提交 159dd833 编写于 作者: H Haichao-Zhang 提交者: emailweixu

split dotmul_projection and dotmul_operator (#87)

* split dotmul_projection and dotmul_operator
* bug fix in outputsize checking for mixed layer
上级 90b9cba7
...@@ -2430,7 +2430,6 @@ class MixedLayer(LayerBase): ...@@ -2430,7 +2430,6 @@ class MixedLayer(LayerBase):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
super(MixedLayer, self).__init__( super(MixedLayer, self).__init__(
name, 'mixed', size, inputs=inputs, **xargs) name, 'mixed', size, inputs=inputs, **xargs)
operator_input_index = [] operator_input_index = []
for operator in self.operators: for operator in self.operators:
operator_conf = operator.operator_conf operator_conf = operator.operator_conf
...@@ -2445,21 +2444,31 @@ class MixedLayer(LayerBase): ...@@ -2445,21 +2444,31 @@ class MixedLayer(LayerBase):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
operator_conf.input_sizes.append(input_layer.size) operator_conf.input_sizes.append(input_layer.size)
operator_input_index.append(input_index) operator_input_index.append(input_index)
if self.config.size == 0: if self.config.size == 0:
size = operator.calc_output_size(operator_conf.input_sizes) size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0: if size != 0:
self.set_layer_size(size) self.set_layer_size(size)
else:
size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0:
config_assert(size == self.config.size,
"different inputs have different size: %s vs. %s" %
(size, self.config.size))
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index] input = self.inputs[input_index]
if input_index not in operator_input_index: if input_index not in operator_input_index:
config_assert(isinstance(input, Projection), "input should be projection or operation") config_assert(isinstance(input, Projection), "input should be projection or operation")
if self.config.size == 0 and isinstance(input, Projection): if self.config.size == 0 and isinstance(input, Projection):
size = input.calc_output_size(input_layer) size = input.calc_output_size(input_layer)
if size != 0: if size != 0:
self.set_layer_size(size) self.set_layer_size(size)
elif isinstance(input, Projection):
sz = input.calc_output_size(input_layer)
if sz != 0:
config_assert(sz == self.config.size,
"different inputs have different size: %s vs. %s" %
(sz, self.config.size))
config_assert(size != 0, "size is not set") config_assert(size != 0, "size is not set")
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
......
...@@ -28,7 +28,7 @@ except ImportError: ...@@ -28,7 +28,7 @@ except ImportError:
import copy import copy
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"identity_projection", "dotmul_projection", "identity_projection", "dotmul_projection", "dotmul_operator",
"table_projection", "mixed_layer", "data_layer", "table_projection", "mixed_layer", "data_layer",
"embedding_layer", "fc_layer", "grumemory", "embedding_layer", "fc_layer", "grumemory",
"pooling_layer", "lstmemory", "last_seq", "first_seq", "pooling_layer", "lstmemory", "last_seq", "first_seq",
...@@ -389,7 +389,7 @@ def identity_projection(input, offset=None): ...@@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
@wrap_param_attr_default() @wrap_param_attr_default()
def dotmul_projection(input, param_attr=None, scale=1): def dotmul_projection(input, param_attr=None, scale=1):
""" """
1. DotMulProjection if input is a layer. DotMulProjection with a layer as input.
It performs element-wise multiplication with weight. It performs element-wise multiplication with weight.
.. math:: .. math::
...@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1): ...@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):
proj = dotmul_projection(input=layer) proj = dotmul_projection(input=layer)
2. DotMulOperator if input is a list or tuple.
It takes two inputs, performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_projection(input=[layer1, layer2],
scale=2.0)
:param input: Input layer. :param input: Input layer.
:type input: LayerOutput|list|tuple :type input: LayerOutput
:param param_attr: Parameter config, None if use default. :param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param scale: config scalar, default value is one. :param scale: config scalar, default value is one.
:type scale: float :type scale: float
:return: A DotMulProjection or DotMulOperator Object. :return: A DotMulProjection Object.
:rtype: DotMulProjection or DotMulOperator :rtype: DotMulProjection
""" """
if isinstance(input, LayerOutput): proj = DotMulProjection(input_layer_name=input.name,
proj = DotMulProjection(input_layer_name=input.name,
size=input.size, size=input.size,
**param_attr.attr) **param_attr.attr)
proj.origin = input proj.origin = input
proj.origin.projection = "dot_mul" return proj
return proj
else:
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
assert param_attr is None
op = DotMulOperator(input_layer_name=[x.name for x in input],
scale=scale)
op.origin = input
op.origin.operator = "dot_mul"
return op
def dotmul_operator(x, y, scale=1):
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_operator(x, y,
scale=1)
:param input: Input layer
:type input: LayerOutput
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulOperator Object.
:rtype: DotMulOperator
"""
assert isinstance(x, LayerOutput)
assert isinstance(y, LayerOutput)
op = DotMulOperator(input_layer_names=[x.name, y.name],
scale=scale)
op.origin = [x, y]
return op
@wrap_bias_attr_default(['padding_attr']) @wrap_bias_attr_default(['padding_attr'])
def context_projection(input, context_len, context_start=None, def context_projection(input, context_len, context_start=None,
...@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput): ...@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput):
if not self.finalized: if not self.finalized:
assert isinstance(other, Projection) or isinstance(other, Operator) assert isinstance(other, Projection) or isinstance(other, Operator)
self.inputs.append(other) self.inputs.append(other)
self.parents.append(other.origin) if isinstance(other, Projection):
self.parents.append(other.origin)
else:
self.parents.extend(other.origin)
return self return self
else: else:
raise MixedLayerType.AddToSealedMixedLayerException() raise MixedLayerType.AddToSealedMixedLayerException()
...@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput): ...@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput):
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support(ERROR_CLIPPING, DROPOUT)
def mixed_layer(size, input=None, name=None, act=None, bias_attr=False, def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
layer_attr=None): layer_attr=None):
""" """
Mixed Layer. A mixed layer will add all inputs together, then activate. Mixed Layer. A mixed layer will add all inputs together, then activate.
......
...@@ -38,8 +38,11 @@ print_layer(input=[out]) ...@@ -38,8 +38,11 @@ print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes))) outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1),
dotmul_projection(input=y1)])
# for ctc # for ctc
tmp = fc_layer(input=x1, tmp = fc_layer(input=[x1, dotmul],
size=num_classes + 1, size=num_classes + 1,
act=SoftmaxActivation()) act=SoftmaxActivation())
ctc = ctc_layer(input=tmp, ctc = ctc_layer(input=tmp,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册