提交 0b0d3d03 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #2134 from lcy-seso/add_param_attr_to_nce

add param_attr to nce_layer and enable multiple inputs.
...@@ -4921,12 +4921,14 @@ def crf_decoding_layer(input, ...@@ -4921,12 +4921,14 @@ def crf_decoding_layer(input,
@wrap_act_default(act=SigmoidActivation()) @wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True) @wrap_bias_attr_default(has_bias=True)
@wrap_param_attr_default()
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def nce_layer(input, def nce_layer(input,
label, label,
num_classes, num_classes=None,
act=None, act=None,
param_attr=None,
weight=None, weight=None,
num_neg_samples=10, num_neg_samples=10,
neg_distribution=None, neg_distribution=None,
...@@ -4942,7 +4944,8 @@ def nce_layer(input, ...@@ -4942,7 +4944,8 @@ def nce_layer(input,
.. code-block:: python .. code-block:: python
cost = nce_layer(input=layer1, label=layer2, weight=layer3, cost = nce_layer(input=[layer1, layer2], label=layer2,
param_attr=[attr1, attr2], weight=layer3,
num_classes=3, neg_distribution=[0.1,0.3,0.6]) num_classes=3, neg_distribution=[0.1,0.3,0.6])
:param name: layer name :param name: layer name
...@@ -4957,6 +4960,8 @@ def nce_layer(input, ...@@ -4957,6 +4960,8 @@ def nce_layer(input,
:type num_classes: int :type num_classes: int
:param act: Activation, default is Sigmoid. :param act: Activation, default is Sigmoid.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
:param num_neg_samples: number of negative samples. Default is 10. :param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int :type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels. :param neg_distribution: The distribution for generating the random negative labels.
...@@ -4972,9 +4977,20 @@ def nce_layer(input, ...@@ -4972,9 +4977,20 @@ def nce_layer(input,
""" """
if isinstance(input, LayerOutput): if isinstance(input, LayerOutput):
input = [input] input = [input]
assert not isinstance(param_attr, collections.Sequence)
param_attr = [param_attr]
else:
if isinstance(param_attr, collections.Sequence):
assert len(input) == len(param_attr)
else:
param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]
assert isinstance(input, collections.Sequence) assert isinstance(input, collections.Sequence)
assert isinstance(label, LayerOutput) assert isinstance(label, LayerOutput)
assert label.layer_type == LayerType.DATA assert label.layer_type == LayerType.DATA
if num_classes is None:
num_classes = label.size
if neg_distribution is not None: if neg_distribution is not None:
assert isinstance(neg_distribution, collections.Sequence) assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes assert len(neg_distribution) == num_classes
...@@ -4984,9 +5000,9 @@ def nce_layer(input, ...@@ -4984,9 +5000,9 @@ def nce_layer(input,
ipts_for_layer = [] ipts_for_layer = []
parents = [] parents = []
for each_input in input: for each_input, attr in zip(input, param_attr):
assert isinstance(each_input, LayerOutput) assert isinstance(each_input, LayerOutput)
ipts_for_layer.append(each_input.name) ipts_for_layer.append(Input(each_input.name, **attr.attr))
parents.append(each_input) parents.append(each_input)
ipts_for_layer.append(label.name) ipts_for_layer.append(label.name)
parents.append(label) parents.append(label)
......
...@@ -215,6 +215,22 @@ layers { ...@@ -215,6 +215,22 @@ layers {
} }
coeff: 1.0 coeff: 1.0
} }
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "labels"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 5000
num_neg_samples: 10
}
parameters { parameters {
name: "___fc_layer_0__.w0" name: "___fc_layer_0__.w0"
size: 800 size: 800
...@@ -245,6 +261,26 @@ parameters { ...@@ -245,6 +261,26 @@ parameters {
initial_strategy: 0 initial_strategy: 0
initial_smart: true initial_smart: true
} }
parameters {
name: "___nce_layer_0__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.0141421356237
dims: 5000
dims: 4
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 5000
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 5000
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input" input_layer_names: "input"
input_layer_names: "labels" input_layer_names: "labels"
input_layer_names: "crf_label" input_layer_names: "crf_label"
...@@ -267,6 +303,7 @@ output_layer_names: "__cross_entropy_with_selfnorm_0__" ...@@ -267,6 +303,7 @@ output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "input" layer_names: "input"
...@@ -292,6 +329,7 @@ sub_models { ...@@ -292,6 +329,7 @@ sub_models {
layer_names: "__huber_cost_0__" layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__" layer_names: "__sum_cost_0__"
layer_names: "__nce_layer_0__"
input_layer_names: "input" input_layer_names: "input"
input_layer_names: "labels" input_layer_names: "labels"
input_layer_names: "crf_label" input_layer_names: "crf_label"
...@@ -314,6 +352,7 @@ sub_models { ...@@ -314,6 +352,7 @@ sub_models {
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
...@@ -60,6 +60,31 @@ layers { ...@@ -60,6 +60,31 @@ layers {
} }
coeff: 1.0 coeff: 1.0
} }
layers {
name: "multi_class_label"
type: "data"
size: 500
active_type: ""
}
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "multi_class_label"
}
inputs {
input_layer_name: "weight"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 500
num_neg_samples: 10
}
parameters { parameters {
name: "___fc_layer_0__.w0" name: "___fc_layer_0__.w0"
size: 3000 size: 3000
...@@ -80,9 +105,30 @@ parameters { ...@@ -80,9 +105,30 @@ parameters {
initial_strategy: 0 initial_strategy: 0
initial_smart: false initial_smart: false
} }
parameters {
name: "___nce_layer_0__.w0"
size: 5000
initial_mean: 0.0
initial_std: 0.04472135955
dims: 500
dims: 10
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 500
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 500
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input" input_layer_names: "input"
input_layer_names: "label" input_layer_names: "label"
input_layer_names: "weight" input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__" output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__" output_layer_names: "__mse_cost_0__"
evaluators { evaluators {
...@@ -100,9 +146,12 @@ sub_models { ...@@ -100,9 +146,12 @@ sub_models {
layer_names: "__fc_layer_0__" layer_names: "__fc_layer_0__"
layer_names: "__cost_0__" layer_names: "__cost_0__"
layer_names: "__mse_cost_0__" layer_names: "__mse_cost_0__"
layer_names: "multi_class_label"
layer_names: "__nce_layer_0__"
input_layer_names: "input" input_layer_names: "input"
input_layer_names: "label" input_layer_names: "label"
input_layer_names: "weight" input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__" output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__" output_layer_names: "__mse_cost_0__"
evaluator_names: "classification_error_evaluator" evaluator_names: "classification_error_evaluator"
......
...@@ -40,4 +40,6 @@ outputs( ...@@ -40,4 +40,6 @@ outputs(
name='huber_label', size=1)), name='huber_label', size=1)),
multi_binary_label_cross_entropy( multi_binary_label_cross_entropy(
input=probs, label=xe_label), input=probs, label=xe_label),
sum_cost(input=hidden)) sum_cost(input=hidden),
nce_layer(
input=hidden, label=labels))
...@@ -11,4 +11,9 @@ outputs( ...@@ -11,4 +11,9 @@ outputs(
classification_cost( classification_cost(
input=fc, label=lbl, weight=wt), input=fc, label=lbl, weight=wt),
mse_cost( mse_cost(
input=fc, label=lbl, weight=wt)) input=fc, label=lbl, weight=wt),
nce_layer(
input=fc,
label=data_layer(
name='multi_class_label', size=500),
weight=wt))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册