split dotmul_projection and dotmul_operator (#87)

* split dotmul_projection and dotmul_operator * bug fix in outputsize checking for mixed layer

split dotmul_projection and dotmul_operator (#87)
* split dotmul_projection and dotmul_operator * bug fix in outputsize checking for mixed layer
159dd833 · Haichao-Zhang · emailweixu · 90b9cba7 · 159dd833 · 159dd833
3 changed file
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2430,7 +2430,6 @@ class MixedLayer(LayerBase):
        config_assert(inputs, 'inputs cannot be empty')
        super(MixedLayer, self).__init__(
            name, 'mixed', size, inputs=inputs, **xargs)
-
        operator_input_index = []
        for operator in self.operators:
            operator_conf = operator.operator_conf
@@ -2445,21 +2444,31 @@ class MixedLayer(LayerBase):
                input_layer = self.get_input_layer(input_index)
                operator_conf.input_sizes.append(input_layer.size)
                operator_input_index.append(input_index)
-            if self.config.size ==  0:
+            if self.config.size == 0:
                size = operator.calc_output_size(operator_conf.input_sizes)
                if size != 0:
                    self.set_layer_size(size)
-
+            else:
+                size = operator.calc_output_size(operator_conf.input_sizes)
+                if size != 0:
+                    config_assert(size == self.config.size,
+                                  "different inputs have different size: %s vs. %s" %
+                                  (size, self.config.size))
        for input_index in xrange(len(self.inputs)):
            input_layer = self.get_input_layer(input_index)
            input = self.inputs[input_index]
            if input_index not in operator_input_index:
                config_assert(isinstance(input, Projection), "input should be projection or operation")
-            if self.config.size ==  0 and isinstance(input, Projection):
+            if self.config.size == 0 and isinstance(input, Projection):
                size = input.calc_output_size(input_layer)
                if size != 0:
                    self.set_layer_size(size)
-
+            elif isinstance(input, Projection):
+            	sz = input.calc_output_size(input_layer)
+            	if sz != 0:
+            		config_assert(sz == self.config.size,
+            		"different inputs have different size: %s vs. %s" %
+            		(sz, self.config.size))
        config_assert(size != 0, "size is not set")

        for input_index in xrange(len(self.inputs)):

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -28,7 +28,7 @@ except ImportError:
 import copy

 __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
-           "identity_projection", "dotmul_projection",
+           "identity_projection", "dotmul_projection", "dotmul_operator",
           "table_projection", "mixed_layer", "data_layer",
           "embedding_layer", "fc_layer", "grumemory",
           "pooling_layer", "lstmemory", "last_seq", "first_seq",
@@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
 @wrap_param_attr_default()
 def dotmul_projection(input, param_attr=None, scale=1):
    """
-    1. DotMulProjection if input is a layer.
+    DotMulProjection with a layer as input.
    It performs element-wise multiplication with weight.

    ..  math::
@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):

       proj = dotmul_projection(input=layer)

-    2. DotMulOperator if input is a list or tuple.
-    It takes two inputs, performs element-wise multiplication:
-
-    .. math::
-       out.row[i] += scale * (in1.row[i] .* in2.row[i])
-
-    where :math:`.*` means element-wise multiplication, and
-    scale is a config scalar, its default value is one.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       op = dotmul_projection(input=[layer1, layer2],
-                              scale=2.0)
-
    :param input: Input layer.
-    :type input: LayerOutput|list|tuple
+    :type input: LayerOutput
    :param param_attr: Parameter config, None if use default.
    :type param_attr: ParameterAttribute
    :param scale: config scalar, default value is one.
    :type scale: float
-    :return: A DotMulProjection or DotMulOperator Object.
-    :rtype: DotMulProjection or DotMulOperator
+    :return: A DotMulProjection Object.
+    :rtype: DotMulProjection
    """
-    if isinstance(input, LayerOutput):
-        proj = DotMulProjection(input_layer_name=input.name,
+    proj = DotMulProjection(input_layer_name=input.name,
                                size=input.size,
                                **param_attr.attr)
-        proj.origin = input
-        proj.origin.projection = "dot_mul"
-        return proj
-    else:
-        assert isinstance(input, list) or isinstance(input, tuple)
-        assert len(input) == 2
-        assert param_attr is None
-        op = DotMulOperator(input_layer_name=[x.name for x in input],
-                            scale=scale)
-        op.origin = input
-        op.origin.operator = "dot_mul"
-        return op
+    proj.origin = input
+    return proj

+def dotmul_operator(x, y, scale=1):
+    """
+    DotMulOperator takes two inputs and performs element-wise multiplication:
+    .. math::
+       out.row[i] += scale * (in1.row[i] .* in2.row[i])
+    where :math:`.*` means element-wise multiplication, and
+    scale is a config scalar, its default value is one.
+    The example usage is:
+    .. code-block:: python
+       op = dotmul_operator(x, y,
+                              scale=1)
+    :param input: Input layer
+    :type input: LayerOutput
+    :param scale: config scalar, default value is one.
+    :type scale: float
+    :return: A DotMulOperator Object.
+    :rtype: DotMulOperator
+    """
+    assert isinstance(x, LayerOutput)
+    assert isinstance(y, LayerOutput)
+    op = DotMulOperator(input_layer_names=[x.name, y.name],
+                        scale=scale)
+    op.origin = [x, y]
+    return op

 @wrap_bias_attr_default(['padding_attr'])
 def context_projection(input, context_len, context_start=None,
@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput):
        if not self.finalized:
            assert isinstance(other, Projection) or isinstance(other, Operator)
            self.inputs.append(other)
-            self.parents.append(other.origin)
+            if isinstance(other, Projection):
+                self.parents.append(other.origin)
+            else:
+                self.parents.extend(other.origin)
            return self
        else:
            raise MixedLayerType.AddToSealedMixedLayerException()
@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput):
 @wrap_act_default(act=LinearActivation())
 @wrap_bias_attr_default(has_bias=False)
 @layer_support(ERROR_CLIPPING, DROPOUT)
-def mixed_layer(size, input=None, name=None, act=None, bias_attr=False,
+def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
                layer_attr=None):
    """
    Mixed Layer. A mixed layer will add all inputs together, then activate.

--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
@@ -38,8 +38,11 @@ print_layer(input=[out])

 outputs(classification_cost(out, data_layer(name="label", size=num_classes)))

+dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1),
+	                        dotmul_projection(input=y1)])
+
 # for ctc
-tmp = fc_layer(input=x1,
+tmp = fc_layer(input=[x1, dotmul],
               size=num_classes + 1,
               act=SoftmaxActivation())
 ctc = ctc_layer(input=tmp,