提交 a2766842 编写于 作者: H Haonan 提交者: GitHub

Merge pull request #429 from emailweixu/math_mul

'*' operator overload for LayerOutput
...@@ -254,6 +254,12 @@ expand_layer ...@@ -254,6 +254,12 @@ expand_layer
:members: expand_layer :members: expand_layer
:noindex: :noindex:
repeat_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: repeat_layer
:noindex:
Math Layers Math Layers
=========== ===========
......
...@@ -3015,7 +3015,7 @@ def Layer( ...@@ -3015,7 +3015,7 @@ def Layer(
layer_func = layers.get(type) layer_func = layers.get(type)
config_assert(layer_func, config_assert(layer_func,
"layer type '%s' not supported." % type) "layer type '%s' not supported." % type)
layer_func(name, **xargs) return layer_func(name, **xargs)
@config_func @config_func
def ParameterHook( def ParameterHook(
......
...@@ -20,3 +20,6 @@ from layers import * ...@@ -20,3 +20,6 @@ from layers import *
from networks import * from networks import *
from optimizers import * from optimizers import *
from attrs import * from attrs import *
# This will enable operator overload for LayerOutput
import math
...@@ -31,6 +31,7 @@ import copy ...@@ -31,6 +31,7 @@ import copy
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"identity_projection", "dotmul_projection", "dotmul_operator", "identity_projection", "dotmul_projection", "dotmul_operator",
"repeat_layer",
"table_projection", "mixed_layer", "data_layer", "table_projection", "mixed_layer", "data_layer",
"embedding_layer", "fc_layer", "grumemory", "embedding_layer", "fc_layer", "grumemory",
"pooling_layer", "lstmemory", "last_seq", "first_seq", "pooling_layer", "lstmemory", "last_seq", "first_seq",
...@@ -99,6 +100,7 @@ class LayerType(object): ...@@ -99,6 +100,7 @@ class LayerType(object):
SCALING_LAYER = 'scaling' SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans' TRANS_LAYER = 'trans'
OUT_PROD_LAYER = 'out_prod' OUT_PROD_LAYER = 'out_prod'
FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
MEMORY = 'memory' MEMORY = 'memory'
MAXID_LAYER = 'maxid' MAXID_LAYER = 'maxid'
...@@ -181,6 +183,7 @@ class LayerOutput(object): ...@@ -181,6 +183,7 @@ class LayerOutput(object):
reverse=None): reverse=None):
assert isinstance(name, basestring) assert isinstance(name, basestring)
assert isinstance(layer_type, basestring) assert isinstance(layer_type, basestring)
assert size is not None
assert LayerType.is_layer_type(layer_type) assert LayerType.is_layer_type(layer_type)
self.name = name self.name = name
self.layer_type = layer_type self.layer_type = layer_type
...@@ -1209,6 +1212,48 @@ def expand_layer(input, expand_as, ...@@ -1209,6 +1212,48 @@ def expand_layer(input, expand_as,
parents=[input, expand_as]) parents=[input, expand_as])
@wrap_name_default()
@layer_support()
def repeat_layer(input, num_repeats,
name=None,
layer_attr=None):
"""
A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input.
.. math::
y = [x, x, \cdots, x]
The example usage is:
.. code-block:: python
expand = repeat_layer(layer, 4)
:param input: Input layer
:type input: LayerOutput
:param num_repeats: Repeat the input so many times
:type num_repeats: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
l = Layer(
inputs=[input.name],
name=name,
num_filters=num_repeats,
type=LayerType.FEATURE_MAP_EXPAND_LAYER,
**ExtraAttr.to_kwargs(layer_attr)
)
return LayerOutput(name=name,
size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
parents=[input])
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def interpolation_layer(input, weight, name=None, layer_attr=None): def interpolation_layer(input, weight, name=None, layer_attr=None):
...@@ -1296,7 +1341,7 @@ def bilinear_interp_layer(input, ...@@ -1296,7 +1341,7 @@ def bilinear_interp_layer(input,
assert out_size_x > 0 and out_size_y > 0 assert out_size_x > 0 and out_size_y > 0
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
Layer(name=name, l = Layer(name=name,
inputs=Input(input.name, inputs=Input(input.name,
bilinear_interp=BilinearInterp(out_size_x=out_size_x, bilinear_interp=BilinearInterp(out_size_x=out_size_x,
out_size_y=out_size_y, out_size_y=out_size_y,
...@@ -1304,7 +1349,7 @@ def bilinear_interp_layer(input, ...@@ -1304,7 +1349,7 @@ def bilinear_interp_layer(input,
type=LayerType.BILINEAR_INTERP_LAYER, type=LayerType.BILINEAR_INTERP_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input], return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input],
num_filters=num_channels) num_filters=num_channels, size=l.config.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
...@@ -1482,7 +1527,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1482,7 +1527,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
inputs=[a.name, b.name], inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b]) return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size)
@wrap_name_default() @wrap_name_default()
...@@ -1545,7 +1590,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, ...@@ -1545,7 +1590,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
ipts_for_layer.append(label.name) ipts_for_layer.append(label.name)
parents.append(label) parents.append(label)
Layer( l = Layer(
name=name, name=name,
type=LayerType.HSIGMOID, type=LayerType.HSIGMOID,
num_classes=num_classes, num_classes=num_classes,
...@@ -1553,7 +1598,8 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, ...@@ -1553,7 +1598,8 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
inputs=ipts_for_layer, inputs=ipts_for_layer,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.HSIGMOID, parents=parents) return LayerOutput(name, LayerType.HSIGMOID, parents=parents,
size=l.config.size)
@wrap_name_default("conv") @wrap_name_default("conv")
...@@ -1671,7 +1717,7 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1671,7 +1717,7 @@ def img_conv_layer(input, filter_size, num_filters,
lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
Layer( l = Layer(
name=name, name=name,
inputs=Input(input.name, conv=Conv( inputs=Input(input.name, conv=Conv(
filter_size=filter_size, padding=padding, stride=stride, filter_size=filter_size, padding=padding, stride=stride,
...@@ -1687,7 +1733,8 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1687,7 +1733,8 @@ def img_conv_layer(input, filter_size, num_filters,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, lt, parents=[input], return LayerOutput(name, lt, parents=[input],
activation=act, num_filters=num_filters) activation=act, num_filters=num_filters,
size=l.config.size)
@wrap_name_default("pool") @wrap_name_default("pool")
...@@ -1750,7 +1797,7 @@ def img_pool_layer(input, pool_size, name=None, ...@@ -1750,7 +1797,7 @@ def img_pool_layer(input, pool_size, name=None,
stride_y = stride if stride_y is None else stride_y stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y padding_y = padding if padding_y is None else padding_y
Layer( l = Layer(
name=name, name=name,
type=LayerType.POOL_LAYER, type=LayerType.POOL_LAYER,
inputs=[Input(input.name, inputs=[Input(input.name,
...@@ -1769,7 +1816,7 @@ def img_pool_layer(input, pool_size, name=None, ...@@ -1769,7 +1816,7 @@ def img_pool_layer(input, pool_size, name=None,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.POOL_LAYER, parents=[input], return LayerOutput(name, LayerType.POOL_LAYER, parents=[input],
num_filters=num_channels) num_filters=num_channels, size=l.config.size)
def __img_norm_layer__(name, input, size, norm_type, scale, power, def __img_norm_layer__(name, input, size, norm_type, scale, power,
...@@ -1778,7 +1825,7 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power, ...@@ -1778,7 +1825,7 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power,
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
Layer( l = Layer(
name=name, type=LayerType.NORM_LAYER, inputs=Input( name=name, type=LayerType.NORM_LAYER, inputs=Input(
input.name, norm=Norm(norm_type=norm_type, input.name, norm=Norm(norm_type=norm_type,
channels=num_channels, size=size, channels=num_channels, size=size,
...@@ -1788,7 +1835,8 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power, ...@@ -1788,7 +1835,8 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, layer_type=LayerType.NORM_LAYER, parents=[input], return LayerOutput(name, layer_type=LayerType.NORM_LAYER, parents=[input],
num_filters=num_channels, img_norm_type=norm_type) num_filters=num_channels, img_norm_type=norm_type,
size=l.config.size)
@wrap_name_default("crmnorm") @wrap_name_default("crmnorm")
...@@ -1913,7 +1961,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None, ...@@ -1913,7 +1961,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
num_channels = input.size num_channels = input.size
assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \
(batch_norm_type == "cudnn_batch_norm") (batch_norm_type == "cudnn_batch_norm")
Layer( l = Layer(
name=name, name=name,
inputs=Input(input.name, inputs=Input(input.name,
image=Image(channels=num_channels), image=Image(channels=num_channels),
...@@ -1929,7 +1977,8 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None, ...@@ -1929,7 +1977,8 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
return LayerOutput(name=name, layer_type=LayerType.BATCH_NORM_LAYER, return LayerOutput(name=name, layer_type=LayerType.BATCH_NORM_LAYER,
parents=[input], activation=act, parents=[input], activation=act,
num_filters=num_channels) num_filters=num_channels,
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2034,7 +2083,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, ...@@ -2034,7 +2083,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
if each_input.num_filters is not None: if each_input.num_filters is not None:
num_filters = each_input.num_filters num_filters = each_input.num_filters
Layer( l = Layer(
name=name, type=LayerType.ADDTO_LAYER, inputs=ipts_for_layer, name=name, type=LayerType.ADDTO_LAYER, inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
active_type=act.name, active_type=act.name,
...@@ -2042,7 +2091,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, ...@@ -2042,7 +2091,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
) )
return LayerOutput(name, LayerType.ADDTO_LAYER, parents=input, return LayerOutput(name, LayerType.ADDTO_LAYER, parents=input,
activation=act, num_filters=num_filters) activation=act, num_filters=num_filters,
size=l.config.size)
@wrap_act_default(act=IdentityActivation()) @wrap_act_default(act=IdentityActivation())
...@@ -2651,13 +2701,14 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -2651,13 +2701,14 @@ def maxid_layer(input, name=None, layer_attr=None):
""" """
assert isinstance(input, LayerOutput) assert isinstance(input, LayerOutput)
Layer(name=name, l = Layer(name=name,
type='maxid', type='maxid',
inputs=[input.name], inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, return LayerOutput(name=name,
layer_type=LayerType.MAXID_LAYER, layer_type=LayerType.MAXID_LAYER,
parents=[input]) parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2686,13 +2737,14 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): ...@@ -2686,13 +2737,14 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
assert isinstance(input1, LayerOutput) assert isinstance(input1, LayerOutput)
assert isinstance(input2, LayerOutput) assert isinstance(input2, LayerOutput)
Layer(name=name, l = Layer(name=name,
type=LayerType.OUT_PROD_LAYER, type=LayerType.OUT_PROD_LAYER,
inputs=[input1.name, input2.name], inputs=[input1.name, input2.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, return LayerOutput(name=name,
layer_type=LayerType.OUT_PROD_LAYER, layer_type=LayerType.OUT_PROD_LAYER,
parents=[input1, input2]) parents=[input1, input2],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2721,13 +2773,14 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -2721,13 +2773,14 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
Layer(name=name, l = Layer(name=name,
type=LayerType.EOSID_LAYER, type=LayerType.EOSID_LAYER,
eos_id=eos_id, eos_id=eos_id,
inputs=[input.name], inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, layer_type=LayerType.EOSID_LAYER, return LayerOutput(name=name, layer_type=LayerType.EOSID_LAYER,
parents=[input]) parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2892,7 +2945,7 @@ def regression_cost(input, label, weight=None, name=None, ...@@ -2892,7 +2945,7 @@ def regression_cost(input, label, weight=None, name=None,
Layer(inputs=ipts, type="square_error", name=name, Layer(inputs=ipts, type="square_error", name=name,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.COST, parents=parents) return LayerOutput(name, LayerType.COST, parents=parents, size=1)
@wrap_name_default("cost") @wrap_name_default("cost")
...@@ -2944,7 +2997,7 @@ def classification_cost(input, label, weight=None, name=None, ...@@ -2944,7 +2997,7 @@ def classification_cost(input, label, weight=None, name=None,
for each_evaluator in evaluator: for each_evaluator in evaluator:
__add_evaluator__(each_evaluator) __add_evaluator__(each_evaluator)
return LayerOutput(name, LayerType.COST, parents=parents) return LayerOutput(name, LayerType.COST, parents=parents, size=1)
def conv_operator(img, filter, filter_size, num_filters, def conv_operator(img, filter, filter_size, num_filters,
...@@ -3326,13 +3379,14 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -3326,13 +3379,14 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
Layer( l = Layer(
name=name, name=name,
type=LayerType.SAMPLING_ID_LAYER, type=LayerType.SAMPLING_ID_LAYER,
inputs=[Input(input.name)], inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input) return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input,
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -3373,7 +3427,8 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0, ...@@ -3373,7 +3427,8 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0,
inputs=[Input(input.name)], inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input) return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input,
size=input.size)
@wrap_name_default() @wrap_name_default()
...@@ -3512,7 +3567,7 @@ def block_expand_layer(input, ...@@ -3512,7 +3567,7 @@ def block_expand_layer(input,
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
Layer(name=name, l = Layer(name=name,
inputs=Input(input.name, inputs=Input(input.name,
block_expand=BlockExpand(channels=num_channels, block_expand=BlockExpand(channels=num_channels,
block_x=block_x, block_x=block_x,
...@@ -3525,7 +3580,8 @@ def block_expand_layer(input, ...@@ -3525,7 +3580,8 @@ def block_expand_layer(input,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input]) return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -3586,13 +3642,14 @@ def maxout_layer(input, ...@@ -3586,13 +3642,14 @@ def maxout_layer(input,
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
assert num_channels % groups == 0 assert num_channels % groups == 0
Layer(name=name, l = Layer(name=name,
inputs=Input(input.name, inputs=Input(input.name,
maxout=MaxOut(channels=num_channels, maxout=MaxOut(channels=num_channels,
groups=groups)), groups=groups)),
type=LayerType.MAXOUT, type=LayerType.MAXOUT,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.MAXOUT, parents=[input]) return LayerOutput(name, LayerType.MAXOUT, parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -3718,7 +3775,10 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, ...@@ -3718,7 +3775,10 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
parents = [input, label] parents = [input, label]
if weight is not None: if weight is not None:
parents.append(weight) parents.append(weight)
return LayerOutput(name, LayerType.CRF_LAYER, parents, size=size) # The size for LayerOutput means the dimension of the output.
# It's different from the meaning of crf layer, which is the number of
# classes.
return LayerOutput(name, LayerType.CRF_LAYER, parents, size=1)
@wrap_name_default() @wrap_name_default()
...@@ -3766,7 +3826,10 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, ...@@ -3766,7 +3826,10 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
parents = [input] parents = [input]
if label is not None: if label is not None:
parents.append(label) parents.append(label)
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=size) # The size for LayerOutput means the dimension of the output.
# It's different from the meaning of crf layer, which is the number of
# classes.
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_bias_attr_default(has_bias=True) @wrap_bias_attr_default(has_bias=True)
@wrap_name_default() @wrap_name_default()
...@@ -3834,7 +3897,7 @@ def nce_layer(input, label, num_classes, weight=None, ...@@ -3834,7 +3897,7 @@ def nce_layer(input, label, num_classes, weight=None,
ipts_for_layer.append(weight.name) ipts_for_layer.append(weight.name)
parents.append(weight) parents.append(weight)
Layer( l = Layer(
name=name, name=name,
type=LayerType.NCE_LAYER, type=LayerType.NCE_LAYER,
num_classes=num_classes, num_classes=num_classes,
...@@ -3844,7 +3907,8 @@ def nce_layer(input, label, num_classes, weight=None, ...@@ -3844,7 +3907,8 @@ def nce_layer(input, label, num_classes, weight=None,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.NCE_LAYER, parents=parents) return LayerOutput(name, LayerType.NCE_LAYER, parents=parents,
size=l.config.size)
""" """
following are cost Layers. following are cost Layers.
...@@ -3919,7 +3983,7 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr= ...@@ -3919,7 +3983,7 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.RANK_COST, parents=parents) return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1)
@wrap_name_default() @wrap_name_default()
...@@ -3971,7 +4035,8 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non ...@@ -3971,7 +4035,8 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score]) return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score],
size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4006,7 +4071,8 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4006,7 +4071,8 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
coeff=coeff, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label]) return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label],
size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4048,7 +4114,7 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, ...@@ -4048,7 +4114,7 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
return LayerOutput(name, return LayerOutput(name,
LayerType.CROSS_ENTROPY_WITH_SELFNORM, LayerType.CROSS_ENTROPY_WITH_SELFNORM,
parents=[input, label]) parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4083,7 +4149,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4083,7 +4149,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
coeff=coeff, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.HUBER, parents=[input, label]) return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4126,4 +4192,4 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0, ...@@ -4126,4 +4192,4 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, return LayerOutput(name, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
parents=[input, label]) parents=[input, label], size=1)
...@@ -13,10 +13,11 @@ ...@@ -13,10 +13,11 @@
# limitations under the License. # limitations under the License.
from .layers import LayerOutput, mixed_layer, identity_projection, \ from .layers import LayerOutput, mixed_layer, identity_projection, \
slope_intercept_layer slope_intercept_layer, scaling_layer, repeat_layer
from .attrs import is_compatible_with from .attrs import is_compatible_with
from .default_decorators import * from .default_decorators import *
import activations as act import activations as act
from paddle.trainer.config_parser import logger
__all__ = [] __all__ = []
...@@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation()) ...@@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation())
def add(layeroutput, other): def add(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput) if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number")
if layeroutput.size == other.size:
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)])
if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" %
(layeroutput.size, other.size))
elif layeroutput.size == 1:
tmp = layeroutput
layeroutput = other
other = tmp
other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[identity_projection(input=layeroutput), return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)]) identity_projection(input=other)])
...@@ -50,10 +65,11 @@ LayerOutput.__add__ = add ...@@ -50,10 +65,11 @@ LayerOutput.__add__ = add
def sub(layeroutput, other): def sub(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput) if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be subtracted with"
" another Layeroutput or a number")
neg = slope_intercept_layer(input=other, slope=-1.0) neg = slope_intercept_layer(input=other, slope=-1.0)
return mixed_layer(input=[identity_projection(input=layeroutput), return add(layeroutput, neg)
identity_projection(input=neg)])
LayerOutput.__sub__ = sub LayerOutput.__sub__ = sub
...@@ -62,3 +78,20 @@ def rsub(layeroutput, other): ...@@ -62,3 +78,20 @@ def rsub(layeroutput, other):
return add(neg, other) return add(neg, other)
LayerOutput.__rsub__ = rsub LayerOutput.__rsub__ = rsub
def mul(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be multiplied with"
" another Layeroutput or a number")
elif layeroutput.size == 1:
return scaling_layer(input=other, weight=layeroutput)
elif other.size == 1:
return scaling_layer(input=layeroutput, weight=other)
else:
logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1")
LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul
...@@ -19,6 +19,12 @@ y = x + y ...@@ -19,6 +19,12 @@ y = x + y
y = y - x y = y - x
y = y - 2 y = y - 2
y = 2 - y y = 2 - y
y = 2 * y
y = y * 3
z= data_layer(name='data_2', size=1)
y = y * z
y = z * y
y = y + z
y = z + y
outputs(y) outputs(y)
...@@ -209,8 +209,129 @@ layers { ...@@ -209,8 +209,129 @@ layers {
slope: 1.0 slope: 1.0
intercept: 2 intercept: 2
} }
layers {
name: "__slope_intercept_layer_6__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_5__"
}
slope: 2
intercept: 0.0
}
layers {
name: "__slope_intercept_layer_7__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_6__"
}
slope: 3
intercept: 0.0
}
layers {
name: "data_2"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__scaling_layer_0__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__slope_intercept_layer_7__"
}
}
layers {
name: "__scaling_layer_1__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__scaling_layer_0__"
}
}
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_2__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__scaling_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_2__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_0__"
proj_conf {
type: "identity"
name: "___mixed_2__.w1"
input_size: 100
output_size: 100
}
}
}
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_3__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__mixed_2__"
proj_conf {
type: "identity"
name: "___mixed_3__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_3__.w1"
input_size: 100
output_size: 100
}
}
}
input_layer_names: "data_2"
input_layer_names: "data" input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__" output_layer_names: "__mixed_3__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "data" layer_names: "data"
...@@ -228,8 +349,18 @@ sub_models { ...@@ -228,8 +349,18 @@ sub_models {
layer_names: "__slope_intercept_layer_3__" layer_names: "__slope_intercept_layer_3__"
layer_names: "__slope_intercept_layer_4__" layer_names: "__slope_intercept_layer_4__"
layer_names: "__slope_intercept_layer_5__" layer_names: "__slope_intercept_layer_5__"
layer_names: "__slope_intercept_layer_6__"
layer_names: "__slope_intercept_layer_7__"
layer_names: "data_2"
layer_names: "__scaling_layer_0__"
layer_names: "__scaling_layer_1__"
layer_names: "__repeat_layer_0__"
layer_names: "__mixed_2__"
layer_names: "__repeat_layer_1__"
layer_names: "__mixed_3__"
input_layer_names: "data_2"
input_layer_names: "data" input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__" output_layer_names: "__mixed_3__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册