Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
297a1698
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
297a1698
编写于
6月 14, 2018
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix doc of warpctc, array_read, edit_distance and sequence_reshape.
上级
e0a8c584
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
522 addition
and
283 deletion
+522
-283
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+39
-24
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+483
-259
未找到文件。
python/paddle/fluid/layers/control_flow.py
浏览文件 @
297a1698
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
import
contextlib
from
layer_function_generator
import
autodoc
from
layer_function_generator
import
autodoc
,
templatedoc
from
tensor
import
assign
,
fill_constant
from
..
import
core
from
..framework
import
Program
,
Variable
,
Operator
...
...
@@ -721,26 +721,22 @@ def lod_rank_table(x, level=0):
return
table
@
templatedoc
()
def
max_sequence_len
(
rank_table
):
"""Max Sequence Len Operator. Given a LoDRankTable object, this layer
returns the max length of a batch of sequences. In fact, a LoDRankTable
object contains a list of tuples(<sequence index, sequence length>) and
the list is already sorted by sequence length in descending order, so the
operator just returns the sequence length of the first tuple element.
"""
${comment}
>>> import paddle.fluid as fluid
>>> x = fluid.layers.data(name='x', shape=[10], dtype='float32',
>>> lod_level=1)
>>> rank_table = layers.lod_rank_table(x=x, level=0)
>>> max_seq_len = layers.max_sequence_len(rank_table)
Args:
rank_table
(Variable): Input variable which is a LoDRankTable object
.
rank_table
(${rank_table_type}): ${rank_table_comment}
.
Returns:
Variable: The max length of sequence.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10],
dtype='float32', lod_level=1)
rank_table = layers.lod_rank_table(x=x, level=0)
max_seq_len = layers.max_sequence_len(rank_table)
${out_comment}.
"""
helper
=
LayerHelper
(
"max_seqence_len"
,
**
locals
())
res
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
@@ -978,19 +974,38 @@ def equal(x, y, cond=None, **ignored):
def
array_read
(
array
,
i
):
"""This function performs the operation to read the data in as an
"""
This function performs the operation to read the data in as an
LOD_TENSOR_ARRAY.
.. code-block:: text
Given:
array = [0.6, 0.1, 0.3, 0.1]
And:
i = 2
Then:
output = 0.3
Args:
array (Variable|list): The input tensor that
will be written to an array
.
i (Variable|list): The
subscript index in tensor array, that points the
place where data will be written to.
array (Variable|list): The input tensor that
store data to be read
.
i (Variable|list): The
index of the data to be read from input array.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
.. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
"""
helper
=
LayerHelper
(
'array_read'
,
**
locals
())
if
not
isinstance
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
297a1698
...
...
@@ -12,78 +12,33 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to the neural network.
All layers just related to the neural network.
"""
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
from
..param_attr
import
ParamAttr
from
layer_function_generator
import
autodoc
from
layer_function_generator
import
autodoc
,
templatedoc
from
tensor
import
concat
import
utils
import
random
__all__
=
[
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'sequence_expand'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'resize_bilinear'
,
'gather'
,
'random_crop'
,
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'sequence_expand'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'gather'
,
'random_crop'
,
'mean_iou'
]
...
...
@@ -92,7 +47,6 @@ def fc(input,
num_flatten_dims
=
1
,
param_attr
=
None
,
bias_attr
=
None
,
use_cudnn
=
False
,
use_mkldnn
=
False
,
act
=
None
,
is_test
=
False
,
...
...
@@ -219,6 +173,7 @@ def embedding(input,
have two elements which indicate the size of the dictionary of
embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update.
is_distributed (bool): Whether to run lookup table from remote parameter server.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output
with zeros whenever lookup encounters it in :attr:`input`. If
...
...
@@ -258,9 +213,10 @@ def embedding(input,
return
tmp
# TODO(qijun): expose H0 and C0
def
dynamic_lstm
(
input
,
size
,
h_0
=
None
,
c_0
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
use_peepholes
=
True
,
...
...
@@ -321,6 +277,13 @@ def dynamic_lstm(input,
(T X 4D), where T is the total time steps in this
mini-batch, D is the hidden size.
size(int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
...
...
@@ -384,12 +347,20 @@ def dynamic_lstm(input,
cell
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
batch_cell_pre_act
=
helper
.
create_tmp_variable
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
if
h_0
:
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of h0 should be (batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
if
c_0
:
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of c0 should be (batch_size, %d)'
%
size
inputs
[
'C0'
]
=
c_0
helper
.
append_op
(
type
=
'lstm'
,
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
},
inputs
=
inputs
,
outputs
=
{
'Hidden'
:
hidden
,
'Cell'
:
cell
,
...
...
@@ -651,8 +622,9 @@ def dynamic_gru(input,
:attr:`False`.
gate_activation(str): The activation for update gate and reset gate.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
activation(str): The activation for candidate hidden state.
candidate_
activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
h_0 (Variable): The hidden output of the first time step.
Returns:
Variable: The hidden state of GRU. The shape is :math:`(T
\\
times D)`,
\
...
...
@@ -673,11 +645,13 @@ def dynamic_gru(input,
attr
=
helper
.
param_attr
,
shape
=
[
size
,
3
*
size
],
dtype
=
dtype
)
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
3
*
size
],
dtype
=
dtype
,
is_bias
=
True
)
batch_size
=
input
.
shape
[
0
]
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
if
h_0
!=
None
:
assert
h_0
.
shape
==
(
size
,
size
),
'The shape of h0 should be(%d, %d)'
%
(
size
,
size
)
inputs
[
'h0'
]
=
h_0
batch_size
,
size
),
'The shape of h0 should be(batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
hidden
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
...
...
@@ -799,7 +773,22 @@ def gru_unit(input,
return
updated_hidden
,
reset_hidden_pre
,
gate
@
templatedoc
()
def
linear_chain_crf
(
input
,
label
,
param_attr
=
None
):
"""
Linear Chain CRF.
${comment}
Args:
input(${emission_type}): ${emission_comment}
label(${label_type}): ${label_comment}
param_attr(ParamAttr): The attribute of the learnable parameter.
Returns:
${log_likelihood_comment}
"""
helper
=
LayerHelper
(
'linear_chain_crf'
,
**
locals
())
size
=
input
.
shape
[
1
]
transition
=
helper
.
create_parameter
(
...
...
@@ -825,7 +814,19 @@ def linear_chain_crf(input, label, param_attr=None):
return
log_likelihood
@
templatedoc
()
def
crf_decoding
(
input
,
param_attr
,
label
=
None
):
"""
${comment}
Args:
input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment}
Returns:
${viterbi_path_comment}
"""
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
viterbi_path
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
...
...
@@ -843,6 +844,13 @@ def cos_sim(X, Y):
"""
This function performs the cosine similarity between two tensors
X and Y and returns that as the output.
Args:
X (Variable): The input X.
Y (Variable): The input Y.
Returns:
Variable: the output of cosine(X, Y).
"""
helper
=
LayerHelper
(
'cos_sim'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
X
.
dtype
)
...
...
@@ -869,15 +877,15 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
unchanged.
Args:
x(v
ariable): The input tensor.
dropout_prob
(float): Probability of setting units to zero.
is_test
(bool): A flag indicating whether it is in test phrase or not.
seed
(int): A Python integer used to create random seeds. If this
parameter is set to None, a random seed is used.
NOTE: If an integer seed is given, always the same output
units will be dropped. DO NOT use a fixed seed in training.
name
(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
x (V
ariable): The input tensor.
dropout_prob
(float): Probability of setting units to zero.
is_test
(bool): A flag indicating whether it is in test phrase or not.
seed
(int): A Python integer used to create random seeds. If this
parameter is set to None, a random seed is used.
NOTE: If an integer seed is given, always the same output
units will be dropped. DO NOT use a fixed seed in training.
name
(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: A tensor variable.
...
...
@@ -999,8 +1007,8 @@ def square_error_cost(input, label):
* :math:`Out`: Output value, same shape with :math:`X`.
Args:
input
(Variable): Input tensor, has predictions.
label
(Variable): Label tensor, has target labels.
input
(Variable): Input tensor, has predictions.
label
(Variable): Label tensor, has target labels.
Returns:
Variable: The tensor variable storing the element-wise squared error
\
...
...
@@ -1029,6 +1037,7 @@ def square_error_cost(input, label):
return
square_out
@
templatedoc
()
def
chunk_eval
(
input
,
label
,
chunk_scheme
,
...
...
@@ -1037,6 +1046,18 @@ def chunk_eval(input,
"""
This function computes and outputs the precision, recall and
F1-score of chunk detection.
Args:
input (Variable): prediction output of the network.
label (Variable): label of the test data set.
chunk_scheme (str): ${chunk_scheme_comment}
num_chunk_types (int): ${num_chunk_types_comment}
excluded_chunk_types (list): ${excluded_chunk_types_comment}
Returns:
tuple: tuple containing: (precision, recall, f1_score,
num_infer_chunks, num_label_chunks,
num_correct_chunks)
"""
helper
=
LayerHelper
(
"chunk_eval"
,
**
locals
())
...
...
@@ -1069,6 +1090,7 @@ def chunk_eval(input,
num_correct_chunks
)
@
templatedoc
()
def
sequence_conv
(
input
,
num_filters
,
filter_size
=
3
,
...
...
@@ -1081,6 +1103,19 @@ def sequence_conv(input,
This function creates the op for sequence_conv, using the inputs and
other convolutional configurations for the filters and stride as given
in the input parameters to the function.
Args:
input (Variable): ${x_comment}
num_filters (int): number of filters.
filter_size (int): the filter size (H and W).
filter_stride (int): stride of the filter.
padding (bool): if True, add paddings.
bias_attr (ParamAttr|None): attributes for bias
param_attr (ParamAttr|None): attributes for parameter
act (str): the activation type
Returns:
Variable: output of sequence_conv
"""
# FIXME(dzh) : want to unify the argument of python layer
...
...
@@ -1180,48 +1215,49 @@ def conv2d(input,
- Input:
Input shape:
$(N, C_{in}, H_{in}, W_{in})$
Input shape:
:math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape:
$(C_{out}, C_{in}, H_f, W_f)$
Filter shape:
:math:`(C_{out}, C_{in}, H_f, W_f)`
- Output:
Output shape:
$(N, C_{out}, H_{out}, W_{out})$
Output shape:
:math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&=
\\
frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1
\\\\
W_{out}&=
\\
frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
H_{out}&=
\\
frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1
\\\\
W_{out}&=
\\
frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act(str): Activation type. Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not.
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution and
\
...
...
@@ -1379,7 +1415,7 @@ def sequence_pool(input, pool_type):
def
sequence_first_step
(
input
):
"""
This func
iton get
the first step of sequence.
This func
tion gets
the first step of sequence.
.. code-block:: text
...
...
@@ -1412,7 +1448,7 @@ def sequence_first_step(input):
def
sequence_last_step
(
input
):
"""
This func
iton get
the last step of sequence.
This func
tion gets
the last step of sequence.
.. code-block:: text
...
...
@@ -1456,6 +1492,22 @@ def pool2d(input,
"""
This function adds the operator for pooling in 2 dimensions, using the
pooling configurations mentioned in input parameters.
Args:
input (Variable): ${input_comment}
pool_size (int): ${ksize_comment}
pool_type (str): ${pooling_type_comment}
pool_stride (int): stride of the pooling layer.
pool_padding (int): padding size.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: output of pool2d layer.
"""
if
pool_type
not
in
[
"max"
,
"avg"
]:
raise
ValueError
(
...
...
@@ -1513,6 +1565,25 @@ def batch_norm(input,
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
Args:
input (Variable): the input variable.
act (str): activation type
is_test (bool): whether to run batch_norm as test mode.
momentum (float): momentum
epsilon (float): epsilon, default 1e-05
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
data_layout (str): data layout, default NCHW
in_place (bool): if True, do not create tmp variable
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): The name of this layer. It is optional.
moving_mean_name (str): The name of moving mean variable name, optional.
moving_variance_name (str): The name of moving variance name, optional.
do_model_average_for_mean_and_var (bool):
Returns:
Variable: output of batch_norm layer.
"""
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
...
...
@@ -1640,6 +1711,7 @@ def layer_norm(input,
bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`.
act(str): Activation to be applied to the output of layer normalizaiton.
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable with the same shape as the input.
...
...
@@ -1691,6 +1763,17 @@ def layer_norm(input,
def
beam_search_decode
(
ids
,
scores
,
name
=
None
):
"""
${beam_search_decode}
Args:
ids (Variable): ${ids_comment}
scores (Variable): ${scores_comment}
name (str): The name of this layer. It is optional.
Returns:
tuple: a tuple of two output variable: sentence_ids, sentence_scores
"""
helper
=
LayerHelper
(
'beam_search_decode'
,
**
locals
())
sentence_ids
=
helper
.
create_tmp_variable
(
dtype
=
ids
.
dtype
)
sentence_scores
=
helper
.
create_tmp_variable
(
dtype
=
ids
.
dtype
)
...
...
@@ -1766,46 +1849,46 @@ def conv2d_transpose(input,
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act(str): Activation type. Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act(str): Activation type. Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution transpose result.
Variable: The tensor variable storing the convolution transpose result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
...
...
@@ -1942,6 +2025,17 @@ def sequence_expand(x, y, ref_level=-1, name=None):
def
beam_search
(
pre_ids
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
):
'''
This function implements the beam search algorithm.
Args:
pre_ids (Variable): ${pre_ids_comment}
ids (Variable): ${ids_comment}
scores (Variable): ${scores_comment}
beam_size (int): ${beam_size_comment}
end_id (int): ${end_id_comment}
level (int): ${level_comment}
Returns:
tuple: a tuple of beam_search output variables: selected_ids, selected_scores
'''
helper
=
LayerHelper
(
'beam_search'
,
**
locals
())
score_type
=
scores
.
dtype
...
...
@@ -2437,19 +2531,21 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
The l2 normalize layer normalizes `x` along dimension `axis` using an L2
norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
output = x / sqrt(max(sum(x**2), epsilon))
.. math::
y =
\f
rac{x}{ \sqrt{\sum {x^2} + epsion }}
For `x` with more dimensions, this layer independently normalizes each 1-D
slice along dimension `axis`.
Args:
x(Variable|list): The input tensor to l2_normalize layer.
axis(int): Dimension along which to normalize the input.
epsilon(float): A lower bound value for `x`'s l2 norm. sqrt(epsilon) will
be used as the divisor if the l2 norm of `x` is less than
sqrt(epsilon).
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
x(Variable|list): The input tensor to l2_normalize layer.
axis(int): The axis on which to apply normalization. If `axis < 0`,
the dimension to normalization is rank(X) + axis. -1 is the
last dimension.
epsilon(float): The epsilon value is used to avoid division by zero,
the defalut value is 1e-10.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
...
...
@@ -2468,46 +2564,17 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
axis
=
0
helper
=
LayerHelper
(
"l2_normalize"
,
**
locals
())
square
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"square"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
square
})
reduced_sum
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
norm
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"reduce_sum"
,
inputs
=
{
"X"
:
square
},
outputs
=
{
"Out"
:
reduced_sum
},
type
=
"norm"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
,
"Norm"
:
norm
},
attrs
=
{
"dim"
:
[
1
]
if
axis
is
None
else
[
axis
],
"keep_dim"
:
True
,
"reduce_all"
:
False
"axis"
:
1
if
axis
is
None
else
axis
,
"epsilon"
:
epsilon
,
})
# TODO(caoying) A lower bound value epsilon for the norm is needed to
# imporve the numeric stability of reciprocal. This requires a maximum_op.
rsquare
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"reciprocal"
,
inputs
=
{
"X"
:
reduced_sum
},
outputs
=
{
"Out"
:
rsquare
})
# TODO(caoying) the current elementwise_mul operator does not support a
# general broadcast rule which broadcasts input(Y) to have the same
# dimension with Input(X) starting from a specified dimension. So this
# exanpsion is requred. Once a general broadcast rule is spported, this
# expanding canbe removed.
rsquare_expanded
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
expand_times
=
[
1
]
*
len
(
x
.
shape
)
expand_times
[
axis
]
=
int
(
x
.
shape
[
axis
])
helper
.
append_op
(
type
=
"expand"
,
inputs
=
{
"X"
:
rsquare
},
outputs
=
{
"Out"
:
rsquare_expanded
},
attrs
=
{
"expand_times"
:
expand_times
})
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"elementwise_mul"
,
inputs
=
{
"X"
:
x
,
"Y"
:
rsquare_expanded
},
outputs
=
{
"Out"
:
out
})
return
out
...
...
@@ -2666,8 +2733,7 @@ def topk(input, k, name=None):
return
values
,
indices
def
edit_distance
(
input
,
label
,
normalized
=
True
,
ignored_tokens
=
None
,
name
=
None
):
def
edit_distance
(
input
,
label
,
normalized
=
True
,
ignored_tokens
=
None
):
"""
EditDistance operator computes the edit distances between a batch of
hypothesis strings and their references. Edit distance, also called
...
...
@@ -2681,26 +2747,23 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
"kitten" -> "sitten" -> "sittin" -> "sitting"
Input(Hyps)
is a LoDTensor consisting of all the hypothesis strings with
The input
is a LoDTensor consisting of all the hypothesis strings with
the total number denoted by `batch_size`, and the separation is specified
by the LoD information. And the `batch_size` reference strings are arranged
in order in the same way in the
LoDTensor Input(Refs)
.
in order in the same way in the
input LoDTensor
.
Output(Out)
contains the `batch_size` results and each stands for the edit
The output
contains the `batch_size` results and each stands for the edit
distance for a pair of strings respectively. If Attr(normalized) is true,
the edit distance will be divided by the length of reference string.
Args:
input(Variable): The indices for hypothesis strings.
label(Variable): The indices for reference strings.
normalized(bool): Indicated whether to normalize the edit distance by
normalized(bool, default True): Indicated whether to normalize the edit distance by
the length of reference string.
ignored_tokens(list of int): Tokens that should be removed before
ignored_tokens(list<int>, default None): Tokens that should be removed before
calculating edit distance.
name (str): The name of this layer. It is optional.
Returns:
Variable: sequence-to-sequence edit distance in shape [batch_size, 1].
...
...
@@ -2710,7 +2773,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
y = fluid.layers.data(name='y', shape=[7], dtype='float32')
cost = fluid.layers.edit_distance(input=x,label=y)
"""
helper
=
LayerHelper
(
"edit_distance"
,
**
locals
())
...
...
@@ -2790,10 +2852,10 @@ def ctc_greedy_decoder(input, blank, name=None):
where Lp is the sum of all input sequences' length and
num_classes is the true number of classes. (not
including the blank label).
blank(int): the blank label index of Connectionist Temporal
Classification (CTC) loss, which is in thehalf-opened
interval [0, num_classes + 1).
name (str): The name of this layer. It is optional.
Returns:
Variable: CTC greedy decode result. If all the sequences in result were
...
...
@@ -2830,35 +2892,33 @@ def warpctc(input, label, blank=0, norm_by_times=False):
input tensor.
Args:
input(Variable): (LodTensor, default: LoDTensor<float>),
the unscaled probabilities of variable-length sequences,
input (Variable): The unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information.
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes.
(not including the blank label).
label(Variable): (LodTensor, default: LoDTensor<int>), the ground truth
of variable-length sequence, which is a 2-D Tensor with LoD
information. It is of the shape [Lg, 1], where Lg is th sum of
all labels' length.
blank: (int, default: 0), the blank label index of Connectionist
label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
where Lg is th sum of all labels' length.
blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1).
norm_by_times
: (bool, default: false), whether to normalize
the gradients by the number of time-step, which is also the
sequence's length. There is no need to normalize the gradients
if warpctc layer was
follewed by a mean_op.
norm_by_times
(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op.
Returns:
Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor of the shape [batch_size, 1].
Examples:
.. code-block:: python
y = layers.data(
name='y', shape=[11, 8], dtype='float32', lod_level=1)
y_predict = layers.data(
name='y_predict', shape=[11, 1], dtype='float32')
cost = layers.warpctc(input=y_predict, label=y)
label = layers.data(shape=[11, 8], dtype='float32', lod_level=1)
predict = layers.data(shape=[11, 1], dtype='float32')
cost = layers.warpctc(input=predict, label=label)
"""
helper
=
LayerHelper
(
'warpctc'
,
**
locals
())
...
...
@@ -2888,16 +2948,21 @@ def sequence_reshape(input, new_dim):
x is a LoDTensor:
x.lod = [[0, 2, 6]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10], [11, 12]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12]]
x.dims = [6, 2]
set new_dim = 4
then out is a LoDTensor:
out.lod = [[0, 1, 3]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8], [9, 10, 11, 12]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]]
out.dims = [3, 4]
Currently, only 1-level LoDTensor is supported and please make sure
...
...
@@ -2905,18 +2970,18 @@ def sequence_reshape(input, new_dim):
no remainder for each sequence.
Args:
input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
with shape being [N, M] where M for dimension.
new_dim (int): New dimension
which
the input LoDTensor is reshaped to.
input (Variable): A 2-D LoDTensor
with shape being [N, M] where M for dimension.
new_dim (int): New dimension
that
the input LoDTensor is reshaped to.
Returns:
Variable: Reshaped LoDTensor according to new dimension.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[5, 20],
dtype='float32', lod_level=1)
x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1)
x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
"""
helper
=
LayerHelper
(
'sequence_reshape'
,
**
locals
())
...
...
@@ -2929,7 +2994,10 @@ def sequence_reshape(input, new_dim):
return
out
@
autodoc
()
# FIXME(wuyi): let docstring_checker.py understand @autodoc.
# For now, the comments in c++ use types like Tensor, but in python side
# the type is often "Variable", and arguments may vary.
@
templatedoc
(
op_type
=
"nce"
)
def
nce
(
input
,
label
,
num_total_classes
,
...
...
@@ -2937,6 +3005,21 @@ def nce(input,
param_attr
=
None
,
bias_attr
=
None
,
num_neg_samples
=
None
):
"""
${comment}
Args:
input (Variable): input variable.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
sample_weight (int): ${sample_weight_comment}
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
num_neg_samples (int): ${num_neg_samples_comment}
Returns:
Variable: output of nce layer.
"""
helper
=
LayerHelper
(
'nce'
,
**
locals
())
assert
isinstance
(
input
,
Variable
)
dim
=
input
.
shape
[
1
]
...
...
@@ -2994,8 +3077,9 @@ def transpose(x, perm, name=None):
perm[i]-th dimension of `input`.
Args:
input (Variable): (Tensor), A Tensor.
perm (list): A permutation of the dimensions of `input`.
x (Variable): The input Tensor.
perm (list): A permutation of the dimensions of `input`.
name (str): The name of this layer. It is optional.
Returns:
Variable: A transposed Tensor.
...
...
@@ -3228,9 +3312,9 @@ def multiplex(inputs, index):
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
Args:
inputs (list): A list of variables to gather from. All variables have the
inputs (list): A list of variables to gather from. All variables have the
same shape and the rank is at least 2.
index (Variable): Tensor<int32>, index variable which is a 2-D tensor
index (Variable): Tensor<int32>, index variable which is a 2-D tensor
with shape [M, 1] where M is the batch size.
Returns:
...
...
@@ -3429,7 +3513,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
begin(int): The first value of this counter.
step(int): The increment step between each execution.
Returns(Variable): The global run counter.
Returns:
Variable: The global run counter.
"""
helper
=
LayerHelper
(
'global_step_counter'
)
if
counter_name
is
None
:
...
...
@@ -3490,7 +3575,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
the corresponding dimension of x.
Args:
input
(variable): The input tensor.
x
(variable): The input tensor.
shape(list): The new shape. At most one dimension of the new shape can
be -1.
actual_shape(variable): An optional input. If provided, reshape
...
...
@@ -3502,8 +3587,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
inplace(bool): If this flag is set true, a new output tensor is created
whose data is copied from input x, otherwise the output
shares data with input without copying.
name (str): The name of this layer. It is optional.
Returns(variable): The output tensor.
Returns:
Variable: The output tensor.
Examples:
.. code-block:: python
...
...
@@ -3929,22 +4016,25 @@ def dice_loss(input, label, epsilon=0.00001):
return
reduce_mean
(
dice_score
)
def
resize_bilinear
(
input
,
out_shape
=
None
,
scale
=
None
,
name
=
None
):
def
image_resize
(
input
,
out_shape
=
None
,
scale
=
None
,
name
=
None
,
resample
=
'BILINEAR'
):
"""
The mathematical meaning of resize bilinear layer is
Bilinear interpolation.
Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and
W-direction in this layer) on a rectilinear 2D grid.
Resize a batch of images.
For details, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Bilinear_interpolation
The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
and the resizing only applies on the last two dimensions(hight and width).
Supporting resample methods:
'BILINEAR' : Bilinear interpolation
Args:
input (Variable): The input tensor of
resize bilinear
layer,
input (Variable): The input tensor of
image resize
layer,
This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w).
out_shape(list|tuple|Variable|None): Output shape of
resize bilinear
out_shape(list|tuple|Variable|None): Output shape of
image resize
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width.
...
...
@@ -3953,6 +4043,8 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
resample(str): The resample method. It can only be 'BILINEAR' currently.
Default: 'BILINEAR'
Returns:
out (Variable): The output is a 4-D tensor of the shape
...
...
@@ -3961,8 +4053,12 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
Examples:
.. code-block:: python
out = fluid.layers.
resize_bilinear
(input, out_shape=[12, 12])
out = fluid.layers.
image_resize
(input, out_shape=[12, 12])
"""
resample_methods
=
{
'BILINEAR'
:
'bilinear_interp'
}
if
resample
not
in
resample_methods
:
raise
ValueError
(
"The 'resample' of image_resize can only be 'BILINEAR' currently."
)
if
out_shape
is
None
and
scale
is
None
:
raise
ValueError
(
"One of out_shape and scale must not be None"
)
helper
=
LayerHelper
(
'bilinear_interp'
,
**
locals
())
...
...
@@ -3990,7 +4086,7 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"bilinear_interp"
,
type
=
resample_methods
[
resample
]
,
inputs
=
inputs
,
outputs
=
{
"Out"
:
out
},
attrs
=
{
"out_h"
:
out_h
,
...
...
@@ -3998,6 +4094,62 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
return
out
@
templatedoc
(
op_type
=
"bilinear_interp"
)
def
resize_bilinear
(
input
,
out_shape
=
None
,
scale
=
None
,
name
=
None
):
"""
${comment}
Args:
input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}.
scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has
a higher priority than scale. Default: None.
name(str|None): The output variable name.
Returns:
${out_comment}.
"""
return
image_resize
(
input
,
out_shape
,
scale
,
name
,
'BILINEAR'
)
def
image_resize_short
(
input
,
out_short_len
,
resample
=
'BILINEAR'
):
"""
Resize a batch of images. The short edge of input images will be
resized to the given 'out_short_len'. The long edge of input images
will be resized proportionately to make images' length-width ratio
constant.
Args:
input (Variable): The input tensor of image resize layer,
This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w).
out_short_len(int): The length of output images' short edge.
resample (str): resample method, default: BILINEAR.
Returns:
out (Variable): The output is a 4-D tensor of the shape
(num_batches, channls, out_h, out_w).
"""
in_shape
=
input
.
shape
if
len
(
in_shape
)
!=
4
:
raise
ValueError
(
"The rank of input must be 4 (num_batches, channels, in_h, in_w)."
)
hw
=
in_shape
[
2
:
4
]
short_idx
=
hw
.
index
(
min
(
hw
))
long_idx
=
1
-
short_idx
out_shape
=
list
(
hw
)
out_shape
[
short_idx
]
=
out_short_len
out_shape
[
long_idx
]
=
int
(
float
(
out_shape
[
long_idx
])
*
(
float
(
out_short_len
)
/
float
(
hw
[
short_idx
]))
+
0.5
)
return
image_resize
(
input
=
input
,
out_shape
=
out_shape
,
resample
=
resample
)
def
gather
(
input
,
index
):
"""
Output is obtained by gathering entries of the outer-most dimension
...
...
@@ -4005,7 +4157,7 @@ def gather(input, index):
.. math::
Out = X[Index]
Out = X[Index]
.. code-block:: text
...
...
@@ -4013,8 +4165,8 @@ def gather(input, index):
Given:
X = [[1, 2],
[3, 4],
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [1, 2]
...
...
@@ -4032,6 +4184,7 @@ def gather(input, index):
output (Variable): The output is a tensor with the same rank as input.
Examples:
.. code-block:: python
output = fluid.layers.gather(x, index)
...
...
@@ -4047,10 +4200,31 @@ def gather(input, index):
return
out
def
random_crop
(
input
,
shape
,
seed
=
1
):
@
templatedoc
()
def
random_crop
(
x
,
shape
,
seed
=
None
):
"""
${comment}
Examples:
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
Args:
x(${x_type}): ${x_comment}
shape(${shape_type}): ${shape_comment}
seed(int|${seed_type}|None): ${seed_comment} By default, the seed will
get from `random.randint(-65536, 65535)`.
Returns:
${out_comment}
"""
helper
=
LayerHelper
(
"random_crop"
,
**
locals
())
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_tmp_variable
(
dtype
)
if
seed
is
None
:
seed
=
random
.
randint
(
-
65536
,
65535
)
if
isinstance
(
seed
,
int
):
seed_value
=
seed
seed
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
@@ -4069,9 +4243,59 @@ def random_crop(input, shape, seed=1):
seed_out
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
helper
.
append_op
(
type
=
"random_crop"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
x
,
"Seed"
:
seed
},
outputs
=
{
"Out"
:
out
,
"SeedOut"
:
seed_out
},
attrs
=
{
"shape"
:
shape
})
return
out
def
mean_iou
(
input
,
label
,
num_classes
):
"""
Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes.
IOU is defined as follows:
.. math::
IOU = true_positive / (true_positive + false_positive + false_negative).
The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it.
Args:
input (Variable): A Tensor of prediction results for semantic labels with type int32 or int64.
label (Variable): A Tensor of ground truth labels with type int32 or int64.
Its shape should be the same as input.
Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples:
.. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
"""
helper
=
LayerHelper
(
'mean_iou'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
out_mean_iou
=
helper
.
create_tmp_variable
(
dtype
=
'float32'
)
out_wrong
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
out_correct
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
helper
.
append_op
(
type
=
"mean_iou"
,
inputs
=
{
"predictions"
:
input
,
"labels"
:
label
},
outputs
=
{
"out_mean_iou"
:
out_mean_iou
,
"out_wrong"
:
out_wrong
,
"out_correct"
:
out_correct
},
attrs
=
{
"num_classes"
:
num_classes
})
return
out_mean_iou
,
out_wrong
,
out_correct
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录