提交 b0205fdc 编写于 作者: C chengduozh

fix other layer

test=develop
上级 458271c3
...@@ -23,7 +23,7 @@ paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wai ...@@ -23,7 +23,7 @@ paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wai
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)) paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
...@@ -95,8 +95,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti ...@@ -95,8 +95,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
...@@ -312,7 +312,7 @@ paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keyw ...@@ -312,7 +312,7 @@ paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keyw
paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)) paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True))
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
...@@ -380,4 +380,4 @@ paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None ...@@ -380,4 +380,4 @@ paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
\ No newline at end of file
...@@ -359,6 +359,11 @@ def dynamic_lstm(input, ...@@ -359,6 +359,11 @@ def dynamic_lstm(input,
W_{fh}, W_{oh}`} W_{fh}, W_{oh}`}
- The shape is (D x 4D), where D is the hidden - The shape is (D x 4D), where D is the hidden
size. size.
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr.
If the Initializer of the param_attr is not set, the
parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The bias attribute for the learnable bias bias_attr (ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden weights, which contains two parts, input-hidden
bias weights and peephole connections weights if bias weights and peephole connections weights if
...@@ -371,6 +376,11 @@ def dynamic_lstm(input, ...@@ -371,6 +376,11 @@ def dynamic_lstm(input,
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}. W_{fc}, W_{oc}`}.
- The shape is (1 x 7D). - The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes (bool): ${use_peepholes_comment} use_peepholes (bool): ${use_peepholes_comment}
is_reverse (bool): ${is_reverse_comment} is_reverse (bool): ${is_reverse_comment}
gate_activation (str): ${gate_activation_comment} gate_activation (str): ${gate_activation_comment}
...@@ -393,7 +403,7 @@ def dynamic_lstm(input, ...@@ -393,7 +403,7 @@ def dynamic_lstm(input,
forward, _ = fluid.layers.dynamic_lstm( forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False) input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
""" """
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstm', **locals()) helper = LayerHelper('lstm', **locals())
size = size // 4 size = size // 4
weight = helper.create_parameter( weight = helper.create_parameter(
...@@ -528,6 +538,11 @@ def dynamic_lstmp(input, ...@@ -528,6 +538,11 @@ def dynamic_lstmp(input,
size. size.
- Projection weight = {:math:`W_{rh}`}. - Projection weight = {:math:`W_{rh}`}.
- The shape of projection weight is (D x P). - The shape of projection weight is (D x P).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr.
If the Initializer of the param_attr is not set, the
parameter is initialized with Xavier. Default: None.
bias_attr(ParamAttr|None): The bias attribute for the learnable bias bias_attr(ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden weights, which contains two parts, input-hidden
bias weights and peephole connections weights if bias weights and peephole connections weights if
...@@ -540,6 +555,11 @@ def dynamic_lstmp(input, ...@@ -540,6 +555,11 @@ def dynamic_lstmp(input,
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
W_{fc}, W_{oc}`}. W_{fc}, W_{oc}`}.
- The shape is (1 x 7D). - The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes(bool): Whether to enable diagonal/peephole connections, use_peepholes(bool): Whether to enable diagonal/peephole connections,
default `True`. default `True`.
is_reverse(bool): Whether to compute reversed LSTM, default `False`. is_reverse(bool): Whether to compute reversed LSTM, default `False`.
...@@ -584,6 +604,7 @@ def dynamic_lstmp(input, ...@@ -584,6 +604,7 @@ def dynamic_lstmp(input,
proj_activation="tanh") proj_activation="tanh")
""" """
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstmp', **locals()) helper = LayerHelper('lstmp', **locals())
size = size // 4 size = size // 4
weight = helper.create_parameter( weight = helper.create_parameter(
...@@ -1283,11 +1304,12 @@ def sequence_conv(input, ...@@ -1283,11 +1304,12 @@ def sequence_conv(input,
If it is set to None or one attribute of ParamAttr, sequence_conv If it is set to None or one attribute of ParamAttr, sequence_conv
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None. is not set, the bias is initialized zero. Default: None.
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
act (str): the activation type act (str): Activation type, if it is set to None, activation is not appended.
Default: None.
name (str|None): A name for this layer(optional). If set None, the layer name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None. will be named automatically. Default: None.
...@@ -1502,7 +1524,7 @@ def conv2d(input, ...@@ -1502,7 +1524,7 @@ def conv2d(input,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1. connected to the second half of the input channels. Default: groups=1.
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`, is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
...@@ -1675,7 +1697,7 @@ def conv3d(input, ...@@ -1675,7 +1697,7 @@ def conv3d(input,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1 connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv3d. If it is set to None or one attribute of ParamAttr, conv3d of conv3d. If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as param_attr. If it is set to None, the parameter will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
...@@ -2137,8 +2159,14 @@ def batch_norm(input, ...@@ -2137,8 +2159,14 @@ def batch_norm(input,
is_test(bool, Default False): Used for training or training. is_test(bool, Default False): Used for training or training.
momentum(float, Default 0.9): momentum(float, Default 0.9):
epsilon(float, Default 1e-05): epsilon(float, Default 1e-05):
param_attr(ParamAttr): The parameter attribute for Parameter `scale`. param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
bias_attr(ParamAttr): The parameter attribute for Parameter `bias`. of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_layout(string, default NCHW): NCHW|NHWC data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory. in_place(bool, Default False): Make the input and output of batch norm reuse memory.
name(string, Default None): A name for this layer(optional). If set None, the layer name(string, Default None): A name for this layer(optional). If set None, the layer
...@@ -2158,6 +2186,7 @@ def batch_norm(input, ...@@ -2158,6 +2186,7 @@ def batch_norm(input,
hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
hidden2 = fluid.layers.batch_norm(input=hidden1) hidden2 = fluid.layers.batch_norm(input=hidden1)
""" """
assert bias_attr is not False, "bias_attr should not be False in batch_norm."
helper = LayerHelper('batch_norm', **locals()) helper = LayerHelper('batch_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -2428,7 +2457,7 @@ def conv2d_transpose(input, ...@@ -2428,7 +2457,7 @@ def conv2d_transpose(input,
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: groups = 1. Default: groups = 1.
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
...@@ -2457,7 +2486,7 @@ def conv2d_transpose(input, ...@@ -2457,7 +2486,7 @@ def conv2d_transpose(input,
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3) conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
""" """
assert param_attr is not False, "param_attr should not be False in conv2d_transpose."
input_channel = input.shape[1] input_channel = input.shape[1]
op_type = 'conv2d_transpose' op_type = 'conv2d_transpose'
...@@ -2616,7 +2645,7 @@ def conv3d_transpose(input, ...@@ -2616,7 +2645,7 @@ def conv3d_transpose(input,
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: groups=1 Default: groups=1
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
...@@ -2645,6 +2674,7 @@ def conv3d_transpose(input, ...@@ -2645,6 +2674,7 @@ def conv3d_transpose(input,
data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32') data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
conv3d_transpose = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3) conv3d_transpose = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3)
""" """
assert param_attr is not False, "param_attr should not be False in conv3d_transpose."
l_type = "conv3d_transpose" l_type = "conv3d_transpose"
helper = LayerHelper(l_type, **locals()) helper = LayerHelper(l_type, **locals())
if not isinstance(input, Variable): if not isinstance(input, Variable):
...@@ -4018,7 +4048,8 @@ def nce(input, ...@@ -4018,7 +4048,8 @@ def nce(input,
sample_weight=None, sample_weight=None,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
num_neg_samples=None): num_neg_samples=None,
name=None):
""" """
${comment} ${comment}
...@@ -4029,9 +4060,18 @@ def nce(input, ...@@ -4029,9 +4060,18 @@ def nce(input,
sample_weight (Variable|None): A Variable of shape [batch_size, 1] sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each storing a weight for each sample. The default weight for each
sample is 1.0. sample is 1.0.
param_attr (ParamAttr|None): attributes for parameter param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
bias_attr (ParamAttr|None): attributes for bias of nce. If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
num_neg_samples (int): ${num_neg_samples_comment} num_neg_samples (int): ${num_neg_samples_comment}
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
Returns: Returns:
Variable: The output nce loss. Variable: The output nce loss.
...@@ -4064,19 +4104,28 @@ def nce(input, ...@@ -4064,19 +4104,28 @@ def nce(input,
""" """
helper = LayerHelper('nce', **locals()) helper = LayerHelper('nce', **locals())
assert isinstance(input, Variable) assert isinstance(input, Variable)
dim = input.shape[1]
assert isinstance(label, Variable) assert isinstance(label, Variable)
dim = input.shape[1]
num_true_class = label.shape[1] num_true_class = label.shape[1]
w = helper.create_parameter( w = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr,
shape=[num_total_classes, dim], shape=[num_total_classes, dim],
is_bias=False, is_bias=False,
dtype=input.dtype) dtype=input.dtype)
b = helper.create_parameter( inputs = {
attr=helper.bias_attr, 'Input': input,
shape=[num_total_classes, 1], 'Label': label,
is_bias=True, 'Weight': w,
dtype=input.dtype) 'SampleWeight': sample_weight if sample_weight is not None else []
}
if helper.bias_attr:
b = helper.create_parameter(
attr=helper.bias_attr,
shape=[num_total_classes, 1],
is_bias=True,
dtype=input.dtype)
inputs['Bias'] = b
cost = helper.create_tmp_variable(dtype=input.dtype) cost = helper.create_tmp_variable(dtype=input.dtype)
sample_logits = helper.create_tmp_variable(dtype=input.dtype) sample_logits = helper.create_tmp_variable(dtype=input.dtype)
sample_labels = helper.create_tmp_variable(dtype=label.dtype) sample_labels = helper.create_tmp_variable(dtype=label.dtype)
...@@ -4093,13 +4142,7 @@ def nce(input, ...@@ -4093,13 +4142,7 @@ def nce(input,
helper.append_op( helper.append_op(
type='nce', type='nce',
inputs={ inputs=inputs,
'Input': input,
'Label': label,
'Weight': w,
'Bias': b,
'SampleWeight': sample_weight if sample_weight is not None else []
},
outputs={ outputs={
'Cost': cost, 'Cost': cost,
'SampleLogits': sample_logits, 'SampleLogits': sample_logits,
...@@ -4109,7 +4152,12 @@ def nce(input, ...@@ -4109,7 +4152,12 @@ def nce(input,
return cost / (num_neg_samples + 1) return cost / (num_neg_samples + 1)
def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None): def hsigmoid(input,
label,
num_classes,
param_attr=None,
bias_attr=None,
name=None):
""" """
The hierarchical sigmoid operator is used to accelerate the training The hierarchical sigmoid operator is used to accelerate the training
process of language model. This operator organizes the classes into a process of language model. This operator organizes the classes into a
...@@ -4130,11 +4178,17 @@ def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None): ...@@ -4130,11 +4178,17 @@ def hsigmoid(input, label, num_classes, param_attr=None, bias_attr=None):
label (Variable): The tensor variable contains labels of training data. label (Variable): The tensor variable contains labels of training data.
It's a tensor with shape is :math:`[N \\times 1]`. It's a tensor with shape is :math:`[N \\times 1]`.
num_classes: (int), The number of classes, must not be less than 2. num_classes: (int), The number of classes, must not be less than 2.
param_attr (ParamAttr|list of ParamAttr, default None): The parameter param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
attribute for learnable parameters/weights of this layer. of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter will create ParamAttr as param_attr. If the Initializer of the param_attr
attribute for the bias of this layer. If it is set to False, no is not set, the parameter is initialized with Xavier. Default: None.
bias will be applied. bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of hsigmoid.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, hsigmoid
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
Returns: Returns:
Out: (Tensor) The cost of hierarchical sigmoid operator. the shape is [N, 1] Out: (Tensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]
......
...@@ -64,21 +64,21 @@ def simple_img_conv_pool(input, ...@@ -64,21 +64,21 @@ def simple_img_conv_pool(input,
average-pooling. Default :math:`max`. average-pooling. Default :math:`max`.
global_pooling (bool): Whether to use the global pooling. If global_pooling = true, global_pooling (bool): Whether to use the global pooling. If global_pooling = true,
pool_size and pool_padding while be ignored. Default False pool_size and pool_padding while be ignored. Default False
conv_stride (int|list|tuple): The stride size of the Conv2d Layer. If stride is a conv_stride (int|list|tuple): The stride size of the conv2d Layer. If stride is a
list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise, list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise,
the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1. the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1.
conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is conv_padding (int|list|tuple): The padding size of the conv2d Layer. If padding is
a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W). a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W).
Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0. Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0.
conv_dilation (int|list|tuple): The dilation size of the Conv2d Layer. If dilation is conv_dilation (int|list|tuple): The dilation size of the conv2d Layer. If dilation is
a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W). a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W).
Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1. Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1.
conv_groups (int): The groups number of the Conv2d Layer. According to grouped conv_groups (int): The groups number of the conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2, convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1. connected to the second half of the input channels. Default: groups=1.
param_attr (ParamAttr): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`, is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册