提交 0b0d3d03 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #2134 from lcy-seso/add_param_attr_to_nce

add param_attr to nce_layer and enable multiple inputs.
......@@ -4921,12 +4921,14 @@ def crf_decoding_layer(input,
@wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True)
@wrap_param_attr_default()
@wrap_name_default()
@layer_support()
def nce_layer(input,
label,
num_classes,
num_classes=None,
act=None,
param_attr=None,
weight=None,
num_neg_samples=10,
neg_distribution=None,
......@@ -4942,7 +4944,8 @@ def nce_layer(input,
.. code-block:: python
cost = nce_layer(input=layer1, label=layer2, weight=layer3,
cost = nce_layer(input=[layer1, layer2], label=layer2,
param_attr=[attr1, attr2], weight=layer3,
num_classes=3, neg_distribution=[0.1,0.3,0.6])
:param name: layer name
......@@ -4957,6 +4960,8 @@ def nce_layer(input,
:type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
......@@ -4972,9 +4977,20 @@ def nce_layer(input,
"""
if isinstance(input, LayerOutput):
input = [input]
assert not isinstance(param_attr, collections.Sequence)
param_attr = [param_attr]
else:
if isinstance(param_attr, collections.Sequence):
assert len(input) == len(param_attr)
else:
param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]
assert isinstance(input, collections.Sequence)
assert isinstance(label, LayerOutput)
assert label.layer_type == LayerType.DATA
if num_classes is None:
num_classes = label.size
if neg_distribution is not None:
assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes
......@@ -4984,9 +5000,9 @@ def nce_layer(input,
ipts_for_layer = []
parents = []
for each_input in input:
for each_input, attr in zip(input, param_attr):
assert isinstance(each_input, LayerOutput)
ipts_for_layer.append(each_input.name)
ipts_for_layer.append(Input(each_input.name, **attr.attr))
parents.append(each_input)
ipts_for_layer.append(label.name)
parents.append(label)
......
......@@ -215,6 +215,22 @@ layers {
}
coeff: 1.0
}
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "labels"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 5000
num_neg_samples: 10
}
parameters {
name: "___fc_layer_0__.w0"
size: 800
......@@ -245,6 +261,26 @@ parameters {
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.0141421356237
dims: 5000
dims: 4
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 5000
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 5000
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
......@@ -267,6 +303,7 @@ output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
sub_models {
name: "root"
layer_names: "input"
......@@ -292,6 +329,7 @@ sub_models {
layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__"
layer_names: "__nce_layer_0__"
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
......@@ -314,6 +352,7 @@ sub_models {
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
is_recurrent_layer_group: false
}
......@@ -60,6 +60,31 @@ layers {
}
coeff: 1.0
}
layers {
name: "multi_class_label"
type: "data"
size: 500
active_type: ""
}
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "multi_class_label"
}
inputs {
input_layer_name: "weight"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 500
num_neg_samples: 10
}
parameters {
name: "___fc_layer_0__.w0"
size: 3000
......@@ -80,9 +105,30 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___nce_layer_0__.w0"
size: 5000
initial_mean: 0.0
initial_std: 0.04472135955
dims: 500
dims: 10
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 500
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 500
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input"
input_layer_names: "label"
input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__"
evaluators {
......@@ -100,9 +146,12 @@ sub_models {
layer_names: "__fc_layer_0__"
layer_names: "__cost_0__"
layer_names: "__mse_cost_0__"
layer_names: "multi_class_label"
layer_names: "__nce_layer_0__"
input_layer_names: "input"
input_layer_names: "label"
input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__"
evaluator_names: "classification_error_evaluator"
......
......@@ -40,4 +40,6 @@ outputs(
name='huber_label', size=1)),
multi_binary_label_cross_entropy(
input=probs, label=xe_label),
sum_cost(input=hidden))
sum_cost(input=hidden),
nce_layer(
input=hidden, label=labels))
......@@ -11,4 +11,9 @@ outputs(
classification_cost(
input=fc, label=lbl, weight=wt),
mse_cost(
input=fc, label=lbl, weight=wt))
input=fc, label=lbl, weight=wt),
nce_layer(
input=fc,
label=data_layer(
name='multi_class_label', size=500),
weight=wt))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册