提交 ef038743 编写于 作者: T Tao Luo

remove legacy python code

上级 81da8549
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cPickle
import logging
import collections
import functools
import itertools
logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s")
class SequenceType(object):
NO_SEQUENCE = 0
SEQUENCE = 1
SUB_SEQUENCE = 2
@classmethod
def tostring(cls, value):
for k in cls.__dict__:
if not k.startswith('__'):
if getattr(cls, k) == value:
return cls.__name__ + '.' + k
return 'INVALID(' + str(value) + ')'
# TODO(yuyang18): Add string data type here.
class DataType(object):
Dense = 0
SparseNonValue = 1
SparseValue = 2
Index = 3
@classmethod
def tostring(cls, value):
for k in cls.__dict__:
if not k.startswith('__'):
if getattr(cls, k) == value:
return cls.__name__ + '.' + k
return 'INVALID(' + str(value) + ')'
class CacheType(object):
NO_CACHE = 0 # No cache at all
# First pass, read data from python. And store them in memory. Read from
# memory during rest passes.
CACHE_PASS_IN_MEM = 1
class InputType(object):
"""
InputType is the base class for paddle input types.
.. note::
this is a base class, and should never be used by user.
:param dim: dimension of input. If the input is an integer, it means the
value range. Otherwise, it means the size of layer.
:type dim: int
:param seq_type: sequence type of input. 0 means it is not a sequence. 1
means it is a variable length sequence. 2 means it is a
nested sequence.
:type seq_type: int
:param type: data type of input.
:type type: int
"""
__slots__ = ['dim', 'seq_type', 'type']
def __init__(self, dim, seq_type, tp):
self.dim = dim
self.seq_type = seq_type
self.type = tp
def __repr__(self):
"""
Return a human readable representation like 'InputType(dim=25921,
seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)'
"""
repr_str = type(self).__name__
repr_str += '('
serialize_func_map = {
'dim': repr,
'seq_type': SequenceType.tostring,
'type': DataType.tostring
}
for idx, k in enumerate(self.__slots__):
if idx != 0:
repr_str += ', '
repr_str += (
k + '=' + serialize_func_map.get(k, repr)(getattr(self, k)))
repr_str += ')'
return repr_str
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Dense Array. It means the input feature is dense array with float type.
For example, if the input is an image with 28*28 pixels, the input of
Paddle neural network could be a dense vector with dimension 784 or a
numpy array with shape (28, 28).
For the 2-D convolution operation, each sample in one mini-batch must have
the similarly size in PaddlePaddle now. But, it supports variable-dimension
feature across mini-batch. For the variable-dimension, the param dim is not
used. While the data reader must yield numpy array and the data feeder will
set the data shape correctly.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.Dense)
def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse binary vector. It means the input feature is a sparse vector and the
every element in this vector is either zero or one.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseNonValue)
def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse vector. It means the input feature is a sparse vector. Most of the
elements in this vector are zero, others could be any float value.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
"""
Data type of integer.
:param seq_type: sequence type of this input.
:type seq_type: int
:param value_range: range of this integer.
:type value_range: int
:return: An input type object
:rtype: InputType
"""
return InputType(value_range, seq_type, DataType.Index)
dense_vector = dense_slot
sparse_binary_vector = sparse_non_value_slot
sparse_float_vector = sparse_value_slot
integer_value = index_slot
# dense_array can be used for variable-length input feature.
# Each feature is not a vector, but a multi-dimensional array.
dense_array = dense_slot
def dense_vector_sequence(dim):
"""
Data type of a sequence of dense vector.
:param dim: dimension of dense vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
def dense_vector_sub_sequence(dim):
return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def sparse_binary_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which every element is either zero
or one.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
def sparse_binary_vector_sub_sequence(dim):
return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def sparse_float_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which most elements are zero,
others could be any float value.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_float_vector(dim, seq_type=SequenceType.SEQUENCE)
def sparse_float_vector_sub_sequence(dim):
return sparse_float_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_value_sequence(value_range):
"""
Data type of a sequence of integer.
:param value_range: range of each element.
:type value_range: int
"""
return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
def integer_value_sub_sequence(dim):
return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE)
integer_sequence = integer_value_sequence
class SingleSlotWrapper(object):
def __init__(self, generator):
self.generator = generator
def __call__(self, obj, filename):
for item in self.generator(obj, filename):
if isinstance(item, dict):
yield item
else:
yield [item]
class InputOrderWrapper(object):
def __init__(self, generator, input_order):
self.generator = generator
self.input_order = input_order
def __call__(self, obj, filename):
for item in self.generator(obj, filename):
if isinstance(item, dict):
yield [
item.get(input_name, None)
for input_name in self.input_order
]
else:
yield item
class CheckWrapper(object):
def __init__(self, generator, input_types, check_fail_continue, logger):
self.generator = generator
self.input_types = input_types
self.check_fail_continue = check_fail_continue
self.logger = logger
def __call__(self, obj, filename):
for items in self.generator(obj, filename):
try:
assert len(items) == len(self.input_types)
assert len(filter(lambda x: x is None, items)) == 0
for item, input_type in itertools.izip(items, self.input_types):
callback = functools.partial(CheckWrapper.loop_callback,
input_type)
for _ in xrange(input_type.seq_type):
callback = functools.partial(CheckWrapper.loop_check,
callback)
callback(item)
yield items
except AssertionError as e:
self.logger.warning(
"Item (%s) is not fit the input type with error %s" %
(repr(item), repr(e)))
if self.check_fail_continue:
continue
else:
raise
@staticmethod
def loop_callback(input_type, each):
assert isinstance(input_type, InputType)
if input_type.type == DataType.Dense:
assert isinstance(each, collections.Sequence)
for d in each:
assert isinstance(d, float)
assert len(each) == input_type.dim
elif input_type.type == DataType.Index:
assert isinstance(each, int)
assert each < input_type.dim
elif input_type.type == DataType.SparseNonValue \
or input_type.type == DataType.SparseValue:
assert isinstance(each, collections.Sequence)
sparse_id = set()
for k in each:
if input_type.type == DataType.SparseValue:
k, v = k
assert isinstance(v, float)
assert isinstance(k, int)
assert k < input_type.dim
sparse_id.add(k)
assert len(sparse_id) == len(each)
else:
raise RuntimeError("Not support input type")
@staticmethod
def loop_check(callback, item):
for each in item:
callback(each)
class CheckInputTypeWrapper(object):
def __init__(self, generator, input_types, logger):
self.generator = generator
self.input_types = input_types
self.logger = logger
def __call__(self, obj, filename):
for items in self.generator(obj, filename):
try:
# dict type is required for input_types when item is dict type
assert (isinstance(items, dict) and \
not isinstance(self.input_types, dict))==False
yield items
except AssertionError as e:
self.logger.error(
"%s type is required for input type but got %s" %
(repr(type(items)), repr(type(self.input_types))))
raise
def provider(input_types=None,
should_shuffle=None,
pool_size=-1,
min_pool_size=-1,
can_over_batch_size=True,
calc_batch_size=None,
cache=CacheType.NO_CACHE,
check=False,
check_fail_continue=False,
init_hook=None,
**outter_kwargs):
"""
Provider decorator. Use it to make a function into PyDataProvider2 object.
In this function, user only need to get each sample for some train/test
file.
The basic usage is:
.. code-block:: python
@provider(some data provider config here...)
def process(settings, file_name):
while not at end of file_name:
sample = readOneSampleFromFile(file_name)
yield sample.
The configuration of data provider should be setup by\:
:param input_types: Specify the input types, can also be set in init_hook.
It could be a list of InputType object. For example,
input_types=[dense_vector(9), integer_value(2)]. Or user
can set a dict of InputType object, which key is
data_layer's name. For example, input_types=\
{'img': img_features, 'label': label}. when using dict of
InputType, user could yield a dict of feature values, which
key is also data_layer's name.
:type input_types: list|tuple|dict
:param should_shuffle: True if data should shuffle. Pass None means shuffle
when is training and not to shuffle when is testing.
:type should_shuffle: bool
:param pool_size: Max number of sample in data pool.
:type pool_size: int
:param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will
random pick sample in pool. So the min_pool_size
effect the randomize of data.
:type min_pool_size: int
:param can_over_batch_size: True if paddle can return a mini-batch larger
than batch size in settings. It is useful when
custom calculate one sample's batch_size.
It is very danger to set it to false and use
calc_batch_size together. Default is true.
:type can_over_batch_size: bool
:param calc_batch_size: a method to calculate each sample's batch size.
Default each sample's batch size is 1. But to you
can customize each sample's batch size.
:type calc_batch_size: callable
:param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE
:type cache: int
:param init_hook: Initialize hook. Useful when data provider need load some
external data like dictionary. The parameter is
(settings, file_list, \*\*kwargs).
- settings. It is the global settings object. User can set
settings.input_types here.
- file_list. All file names for passed to data provider.
- is_train. Is this data provider used for training or not.
- kwargs. Other keyword arguments passed from
trainer_config's args parameter.
:type init_hook: callable
:param check: Check the yield data format is as same as input_types. Enable
this will make data provide process slow but it is very useful
for debug. Default is disabled.
:type check: bool
:param check_fail_continue: Continue train or not when check failed. Just
drop the wrong format data when it is True. Has
no effect when check set to False.
:type check_fail_continue: bool
"""
def __wrapper__(generator):
class DataProvider(object):
def __init__(self, file_list, **kwargs):
self.logger = logging.getLogger("")
self.logger.setLevel(logging.INFO)
self.input_types = None
self.should_shuffle = should_shuffle
true_table = [1, 't', 'true', 'on']
false_table = [0, 'f', 'false', 'off']
if not isinstance(self.should_shuffle, bool) and \
self.should_shuffle is not None:
if isinstance(self.should_shuffle, basestring):
self.should_shuffle = self.should_shuffle.lower()
if self.should_shuffle in true_table:
self.should_shuffle = True
elif self.should_shuffle in false_table:
self.should_shuffle = False
else:
self.logger.warning(
"Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle."
" Please set should_shuffle to bool value or "
"something in %s" %
(repr(self.should_shuffle),
repr(true_table + false_table)))
self.should_shuffle = None
self.pool_size = pool_size
self.can_over_batch_size = can_over_batch_size
self.calc_batch_size = calc_batch_size
self.file_list = file_list
self.generator = generator
self.cache = cache
self.min_pool_size = min_pool_size
self.input_order = kwargs['input_order']
self.check = check
if init_hook is not None:
init_hook(self, file_list=file_list, **kwargs)
if 'slots' in outter_kwargs:
self.logger.warning('setting slots value is deprecated, '
'please use input_types instead.')
self.slots = outter_kwargs['slots']
if input_types is not None:
self.slots = input_types
if self.input_types is not None:
self.slots = self.input_types
assert self.slots is not None, \
"Data Provider's input_types must be set"
assert self.generator is not None
use_dynamic_order = False
if isinstance(self.slots, dict): # reorder input_types
self.slots = [self.slots[ipt] for ipt in self.input_order]
use_dynamic_order = True
if len(self.slots) == 1:
self.generator = SingleSlotWrapper(self.generator)
if use_dynamic_order:
self.generator = InputOrderWrapper(self.generator,
self.input_order)
else:
self.generator = CheckInputTypeWrapper(
self.generator, self.slots, self.logger)
if self.check:
self.generator = CheckWrapper(self.generator, self.slots,
check_fail_continue,
self.logger)
return DataProvider
return __wrapper__
def deserialize_args(args):
"""
Internal use only.
:param args:
:return:
"""
return cPickle.loads(args)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
此差异已折叠。
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.proto.DataConfig_pb2 import DataConfig
g_config = None
def SimpleData(files=None,
feat_dim=None,
context_len=None,
buffer_capacity=None):
data_config = DataConfig()
data_config.type = 'simple'
data_config.files = files
data_config.feat_dim = feat_dim
if context_len is not None:
data_config.context_len = context_len
if buffer_capacity:
data_config.buffer_capacity = buffer_capacity
return data_config
def get_config_funcs(trainer_config):
global g_config
g_config = trainer_config
return dict(SimpleData=SimpleData)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# recurrent_units.py
# Version 2.0
#
# Some recurrent units can be used in recurrent layer group,
# to use these units, import this module in your config_file:
# import trainer.recurrent_units
#
# The modules in this file are DEPRECATED.
# If you would like to use lstm/gru
# please use the functions defined in paddle.trainer_config_helpers.
from paddle.trainer.config_parser import *
# long short term memory, can be used in recurrent machine
# *inputs* must be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of
# two LstmRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
def LstmRecurrentUnit(name,
size,
active_type,
state_active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if para_prefix is None:
para_prefix = name
if out_memory is None:
out_memory = Memory(name=name, size=size)
state_memory = Memory(name=name + "_" + "state", size=size)
Layer(
name=name + "_" + "input_recurrent",
type="mixed",
size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold=error_clipping_threshold,
bias=Bias(
initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
inputs=inputs + [
FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
], )
LstmStepLayer(
name=name,
size=size,
bias=Bias(parameter_name=para_prefix + "_check.b"),
inputs=[name + "_" + "input_recurrent", state_memory],
active_type=active_type,
active_gate_type=gate_active_type,
active_state_type=state_active_type, )
GetOutputLayer(
name=name + "_" + "state",
size=size,
inputs=Input(
name, input_layer_argument="state"), )
def LstmRecurrentUnitNaive(name,
size,
active_type,
state_active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if para_prefix is None:
para_prefix = name
if out_memory is None:
out_memory = Memory(name=name, size=size)
state_memory = Memory(name=name + "_" + "state", size=size)
Layer(
name=name + "_" + "input_recurrent",
type="mixed",
size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold=error_clipping_threshold,
bias=Bias(
initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
inputs=inputs + [
FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
], )
ExpressionLayer(
name=name + "_" + "input_s",
size=size,
active_type=active_type,
inputs=[
IdentityOffsetProjection(
name + "_" + "input_recurrent", offset=0)
], )
ExpressionLayer(
name=name + "_" + "input_gate",
active_type=gate_active_type,
inputs=[
IdentityOffsetProjection(
name + "_" + "input_recurrent", offset=size), DotMulProjection(
state_memory, parameter_name=para_prefix + "_input_check.w")
], )
ExpressionLayer(
name=name + "_" + "forget_gate",
active_type=gate_active_type,
inputs=[
IdentityOffsetProjection(
name + "_" + "input_recurrent", offset=size * 2),
DotMulProjection(
state_memory, parameter_name=para_prefix + "_forget_check.w")
], )
ExpressionLayer(
name=name + "_" + "state",
inputs=[
DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]),
DotMulOperator([state_memory, name + "_" + "forget_gate"]),
], )
ExpressionLayer(
name=name + "_" + "output_gate",
active_type=gate_active_type,
inputs=[
IdentityOffsetProjection(
name + "_" + "input_recurrent", offset=size * 3),
DotMulProjection(
name + "_" + "state",
parameter_name=para_prefix + "_output_check.w")
], )
ExpressionLayer(
name=name + "_" + "state_atv",
active_type=state_active_type,
inputs=IdentityProjection(name + "_" + "state"), )
ExpressionLayer(
name=name,
inputs=DotMulOperator(
[name + "_" + "state_atv", name + "_" + "output_gate"]), )
# like LstmRecurrentUnit, but it's a layer group.
# it is equivalent to LstmLayer
def LstmRecurrentLayerGroup(name,
size,
active_type,
state_active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
seq_reversed=False):
input_layer_name = name + "_" + "transform_input"
Layer(
name=input_layer_name,
type="mixed",
size=size * 4,
active_type="",
bias=False,
inputs=inputs, )
RecurrentLayerGroupBegin(
name + "_layer_group",
in_links=[input_layer_name],
out_links=[name],
seq_reversed=seq_reversed)
LstmRecurrentUnit(
name=name,
size=size,
active_type=active_type,
state_active_type=state_active_type,
gate_active_type=gate_active_type,
inputs=[IdentityProjection(input_layer_name)],
para_prefix=para_prefix,
error_clipping_threshold=error_clipping_threshold, )
RecurrentLayerGroupEnd(name + "_layer_group")
# gated recurrent unit, can be used in recurrent machine
# *inputs* should be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of
# two GatedRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
def GatedRecurrentUnit(name,
size,
active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
input_layer_name = inputs
else:
input_layer_name = name + "_" + "transform_input"
Layer(
name=input_layer_name,
type="mixed",
size=size * 3,
active_type="",
bias=False,
inputs=inputs, )
if para_prefix is None:
para_prefix = name
if out_memory is None:
out_memory = Memory(name=name, size=size)
GruStepLayer(
name=name,
size=size,
bias=Bias(parameter_name=para_prefix + "_gate.b"),
inputs=[
input_layer_name, Input(
out_memory, parameter_name=para_prefix + "_gate.w")
],
active_type=active_type,
active_gate_type=gate_active_type, )
def GatedRecurrentUnitNaive(name,
size,
active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
input_layer_name = inputs
else:
input_layer_name = name + "_" + "transform_input"
Layer(
name=input_layer_name,
type="mixed",
size=size * 3,
active_type="",
bias=False,
inputs=inputs, )
if para_prefix is None:
para_prefix = name
if out_memory is None:
out_memory = Memory(name=name, size=size)
Layer(
name=name + "_" + "update_gate",
type="mixed",
size=size,
active_type=gate_active_type,
error_clipping_threshold=error_clipping_threshold,
bias=Bias(
initial_std=0, parameter_name=para_prefix + "_update_gate.b"),
inputs=[
IdentityOffsetProjection(
input_layer_name, offset=0), FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_update_gate.w")
], )
Layer(
name=name + "_" + "reset_gate",
type="mixed",
size=size,
active_type=gate_active_type,
error_clipping_threshold=error_clipping_threshold,
bias=Bias(
initial_std=0, parameter_name=para_prefix + "_reset_gate.b"),
inputs=[
IdentityOffsetProjection(
input_layer_name, offset=size), FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_reset_gate.w")
], )
ExpressionLayer(
name=name + "_" + "reset_output",
inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), )
Layer(
name=name + "_" + "output_candidate",
type="mixed",
size=size,
active_type=active_type,
error_clipping_threshold=error_clipping_threshold,
bias=Bias(
initial_std=0, parameter_name=para_prefix + "_output_candidate.b"),
inputs=[
IdentityOffsetProjection(
input_layer_name, offset=size * 2), FullMatrixProjection(
name + "_" + "reset_output",
parameter_name=para_prefix + "_output_candidate.w")
], )
ExpressionLayer( #element-wise interpolation
name=name,
inputs=[
IdentityProjection(out_memory),
DotMulOperator(
[out_memory, name + "_" + "update_gate"], scale=-1.0),
DotMulOperator(
[name + "_" + "output_candidate", name + "_" + "update_gate"]),
], )
# like GatedRecurrentUnit, but it's a layer group.
# it is equivalent to GatedRecurrentLayer.
def GatedRecurrentLayerGroup(name,
size,
active_type,
gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
seq_reversed=False):
input_layer_name = name + "_" + "transform_input"
Layer(
name=input_layer_name,
type="mixed",
size=size * 3,
active_type="",
bias=False,
inputs=inputs, )
RecurrentLayerGroupBegin(
name + "_layer_group",
in_links=[input_layer_name],
out_links=[name],
seq_reversed=seq_reversed)
GatedRecurrentUnit(
name=name,
size=size,
active_type=active_type,
gate_active_type=gate_active_type,
inputs=input_layer_name, #transform outside
para_prefix=para_prefix,
error_clipping_threshold=error_clipping_threshold, )
RecurrentLayerGroupEnd(name + "_layer_group")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from activations import *
from data_sources import *
from poolings import *
from evaluators import *
from layers import *
from networks import *
from optimizers import *
from attrs import *
from config_parser_utils import *
# This will enable operator overload for LayerOutput
import layer_math
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = [
"TanhActivation", "SigmoidActivation", "SoftmaxActivation",
"IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
"STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation",
"LogActivation", "SqrtActivation", "ReciprocalActivation",
"SoftSignActivation"
]
class BaseActivation(object):
"""
A mark for activation class.
Each activation inherit BaseActivation, which has two parameters.
:param name: activation name in paddle config.
:type name: basestring
:param support_hppl: True if supported by hppl. HPPL is a library used by paddle
internally. Currently, lstm layer can only use activations
supported by hppl.
:type support_hppl: bool
"""
def __init__(self, name, support_hppl):
self.name = name
self.support_hppl = support_hppl
def __repr__(self):
return self.name
class TanhActivation(BaseActivation):
"""
Tanh activation.
.. math::
f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
"""
def __init__(self):
BaseActivation.__init__(self, 'tanh', True)
class SigmoidActivation(BaseActivation):
"""
Sigmoid activation.
.. math::
f(z) = \\frac{1}{1+exp(-z)}
"""
def __init__(self):
BaseActivation.__init__(self, 'sigmoid', True)
class SoftmaxActivation(BaseActivation):
"""
Softmax activation for simple input
.. math::
P(y=j|x) = \\frac{e^{x_j}} {\\sum^K_{k=1} e^{x_k} }
"""
def __init__(self):
BaseActivation.__init__(self, 'softmax', False)
class SequenceSoftmaxActivation(BaseActivation):
"""
Softmax activation for one sequence. The dimension of input feature must be
1 and a sequence.
.. code:: python
result = softmax(for each_feature_vector[0] in input_feature)
for i, each_time_step_output in enumerate(output):
each_time_step_output = result[i]
"""
def __init__(self):
BaseActivation.__init__(self, 'sequence_softmax', False)
class IdentityActivation(BaseActivation):
"""
Identity Activation.
Just do nothing for output both forward/backward.
"""
def __init__(self):
BaseActivation.__init__(self, '', False)
LinearActivation = IdentityActivation
class ReluActivation(BaseActivation):
"""
Relu activation.
forward. :math:`y = max(0, z)`
derivative:
.. math::
1 &\\quad if z > 0 \\\\
0 &\\quad\\mathrm{otherwize}
"""
def __init__(self):
BaseActivation.__init__(self, 'relu', True)
class BReluActivation(BaseActivation):
"""
BRelu Activation.
forward. :math:`y = min(24, max(0, z))`
derivative:
.. math::
1 &\\quad if 0 < z < 24 \\\\
0 &\\quad \\mathrm{otherwise}
"""
def __init__(self):
BaseActivation.__init__(self, 'brelu', False)
class SoftReluActivation(BaseActivation):
"""
SoftRelu Activation.
"""
def __init__(self):
BaseActivation.__init__(self, 'softrelu', False)
class STanhActivation(BaseActivation):
"""
Scaled Tanh Activation.
.. math::
f(z) = 1.7159 * tanh(2/3*z)
"""
def __init__(self):
BaseActivation.__init__(self, 'stanh', False)
class AbsActivation(BaseActivation):
"""
Abs Activation.
Forward: :math:`f(z) = abs(z)`
Derivative:
.. math::
1 &\\quad if \\quad z > 0 \\\\
-1 &\\quad if \\quad z < 0 \\\\
0 &\\quad if \\quad z = 0
"""
def __init__(self):
BaseActivation.__init__(self, 'abs', False)
class SquareActivation(BaseActivation):
"""
Square Activation.
.. math::
f(z) = z^2.
"""
def __init__(self):
BaseActivation.__init__(self, 'square', False)
class ExpActivation(BaseActivation):
"""
Exponential Activation.
.. math::
f(z) = e^z.
"""
def __init__(self):
BaseActivation.__init__(self, 'exponential', False)
class LogActivation(BaseActivation):
"""
Logarithm Activation.
.. math::
f(z) = log(z)
"""
def __init__(self):
BaseActivation.__init__(self, 'log', False)
class SqrtActivation(BaseActivation):
"""
Square Root Activation.
.. math::
f(z) = sqrt(z)
"""
def __init__(self):
BaseActivation.__init__(self, 'sqrt', False)
class ReciprocalActivation(BaseActivation):
"""
Reciprocal Activation.
.. math::
f(z)=\\frac{1}{z}
"""
def __init__(self):
BaseActivation.__init__(self, 'reciprocal', False)
class SoftSignActivation(BaseActivation):
"""
SoftSign Activation.
.. math::
f(z)=\\frac{z}{1 + |z|}
"""
def __init__(self):
BaseActivation.__init__(self, 'softsign', False)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import *
__all__ = [
'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute',
'ExtraLayerAttribute'
]
def convert_and_compare(x, Type):
"""
Convert x to be the same type as Type and then convert back to
check whether there is a loss of information
:param x: object to be checked
:param Type: target type to check x over
"""
return type(x)(Type(x)) == x
def is_compatible_with(x, Type):
"""
Check if x has a type compatible with Type
:param x: object to be checked
:param Type: target type to check x over
"""
if type(x) == Type:
return True
try:
if float == Type or int == Type:
# avoid those types that can be converted to float/int but not very
# meaningful and could potentially lead to error
# i.e., str and bool typed value should not be used for initializing float/int variable
if not isinstance(x, str) and not isinstance(x, bool):
return convert_and_compare(x, Type)
elif bool == Type:
# should not use string type to initialize bool variable
if not isinstance(x, str):
return convert_and_compare(x, Type)
else:
return False
except:
return False
class HookAttribute(object):
"""
Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
during training process of a layer with parameters, such as img_conv layer, fc layer.
:param type: Hook type, currently supported types:
'pruning' : user specify a sparsity_ratio before training started, and the
network will prune the parameters based on the sparsity_ratio.
eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
num_channels=3, num_filters=64,
param_attr=ParameterAttribute(update_hooks=hk) )
The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
:type type: string
:param sparsity_ratio: Must be specified if hook type is 'pruning',
it represents the ratio of the zero elements to be set by the Parameter.
:type sparsity_ratio: float or None
"""
def __init__(self, type, sparsity_ratio=None):
self.type = type
self.sparsity_ratio = sparsity_ratio
if self.sparsity_ratio is not None:
assert is_compatible_with(
self.sparsity_ratio,
float), 'sparisity_ratio must be float type'
assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] '
def __call__(self):
return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio)
class ParameterAttribute(object):
"""
Parameter Attributes object. To fine-tuning network training process, user
can set attribute to control training details, such as l1,l2 rate / learning
rate / how to init param.
NOTE: IT IS A HIGH LEVEL USER INTERFACE.
:param is_static: True if this parameter will be fixed while training.
:type is_static: bool
:param initial_std: Gauss Random initialization standard deviation.
None if not using Gauss Random initialize parameter.
:type initial_std: float or None
:param initial_mean: Gauss Random initialization mean.
None if not using Gauss Random initialize parameter.
:type initial_mean: float or None
:param initial_max: Uniform initialization max value.
:type initial_max: float or None
:param initial_min: Uniform initialization min value.
:type initial_min: float or None
:param l1_rate: the l1 regularization factor
:type l1_rate: float or None
:param l2_rate: the l2 regularization factor
:type l2_rate: float or None
:param learning_rate: The parameter learning rate. None means 1.
The learning rate when optimize is LEARNING_RATE =
GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE
* SCHEDULER_FACTOR.
:type learning_rate: float or None
:param momentum: The parameter momentum. None means use global value.
:type momentum: float or None
:param gradient_clipping_threshold: gradient clipping threshold. If gradient
value larger than some value, will be
clipped.
:type gradient_clipping_threshold: float
:param sparse_update: Enable sparse update for this parameter. It will
enable both local and remote sparse update.
:type sparse_update: bool
:param update_hooks: A HookAttribute object.
:type update_hooks: HookAttribute
:param initializer: If not None, it should be a callable object which accepts
a parameter name and returns numpy array for the initial
value of the parameter
:type initializer: callable object
"""
def __init__(self,
name=None,
is_static=False,
initial_std=None,
initial_mean=None,
initial_max=None,
initial_min=None,
l1_rate=None,
l2_rate=None,
learning_rate=None,
momentum=None,
gradient_clipping_threshold=None,
sparse_update=False,
update_hooks=None,
initializer=None):
self.attr = {}
if is_static:
self.attr['is_static'] = True
if initial_std is None and initial_mean is None and initial_max \
is None and initial_min is None:
self.attr['initial_smart'] = True
elif is_compatible_with(initial_std, float) or \
is_compatible_with(initial_mean, float):
if initial_std is not None:
self.attr['initial_std'] = initial_std
if initial_mean is not None:
self.attr['initial_mean'] = initial_mean
self.attr['initial_strategy'] = 0 # Gauss Random
elif is_compatible_with(initial_max, float) and \
is_compatible_with(initial_min, float):
initial_max = initial_max
initial_min = initial_min
assert initial_min < initial_max
initial_mean = (initial_max + initial_min) / 2
initial_std = initial_mean - initial_min
self.attr['initial_mean'] = initial_mean
self.attr['initial_std'] = initial_std
self.attr['initial_strategy'] = 1 # Uniform Random
else:
raise RuntimeError("Unexpected branch.")
if not is_static and is_compatible_with(l1_rate, float):
self.attr['decay_rate_l1'] = l1_rate
if not is_static and is_compatible_with(l2_rate, float):
self.attr['decay_rate'] = l2_rate
if not is_static and is_compatible_with(learning_rate, float):
self.attr['learning_rate'] = learning_rate
if not is_static and is_compatible_with(momentum, float):
self.attr['momentum'] = momentum
if name is not None:
self.attr['parameter_name'] = name
if sparse_update:
self.attr['sparse_update'] = True
self.attr['sparse_remote_update'] = True
if gradient_clipping_threshold is not None and \
is_compatible_with(gradient_clipping_threshold, float):
self.attr['gradient_clipping_threshold'] = \
gradient_clipping_threshold
if initializer is not None:
self.attr['initializer'] = initializer
if update_hooks:
self.attr['update_hooks'] = update_hooks
def set_default_parameter_name(self, name):
"""
Set default parameter name. If parameter not set, then will use default
parameter name.
:param name: default parameter name.
:type name: basestring
"""
if 'parameter_name' not in self.attr:
self.attr['parameter_name'] = name
@staticmethod
def to_bias(bias_attr):
if isinstance(bias_attr, ParameterAttribute):
return Bias(**bias_attr.attr)
else:
return False
class ExtraLayerAttribute(object):
"""
Some high level layer attributes config. You can set all attributes here,
but some layer doesn't support all attributes. If you set an attribute to a
layer that not support this attribute, paddle will print an error and core.
:param error_clipping_threshold: Error clipping threshold.
:type error_clipping_threshold: float
:param drop_rate: Dropout rate. Dropout will create a mask on layer output.
The dropout rate is the zero rate of this mask. The
details of what dropout is please refer to `JMLRdropout
<https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
>`_.
:type drop_rate: float
:param device: device ID of layer. device=-1, use CPU. device>=0, use GPU.
The details allocation in parallel_nn please refer to `use_case
<https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2
/howto/cmd_parameter/use_case_en.md#case-2-specify-layers-in
-different-devices>`_.
:type device: int
"""
def __init__(self,
error_clipping_threshold=None,
drop_rate=None,
device=None):
self.attr = dict()
if error_clipping_threshold is not None:
error_clipping_threshold = float(error_clipping_threshold)
if error_clipping_threshold < 0:
raise ValueError("Error clipping must > 0")
self.attr['error_clipping_threshold'] = error_clipping_threshold
if drop_rate is not None:
drop_rate = float(drop_rate)
if drop_rate < 0:
raise ValueError("Dropout rate must > 0")
self.attr["drop_rate"] = drop_rate
if isinstance(device, int):
self.attr["device"] = device
def check(self, layer_name):
for key in self.attr:
if not hasattr(self, 'can_%s' % key) or \
not getattr(self, 'can_%s' % key):
raise NotImplementedError("Layer %s does not support %s" %
(layer_name, key))
@staticmethod
def to_kwargs(attr):
if attr is None:
return dict()
else:
return attr.attr
HookAttr = HookAttribute
ParamAttr = ParameterAttribute
ExtraAttr = ExtraLayerAttribute
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import paddle.trainer.config_parser as config_parser
from paddle.proto.TrainerConfig_pb2 import OptimizationConfig
'''
This file is a wrapper of formal config_parser. The main idea of this file is to
separete different config logic into different function, such as network configuration
and optimizer configuration.
'''
__all__ = [
"parse_trainer_config", "parse_network_config", "parse_optimizer_config",
"reset_parser"
]
def parse_trainer_config(trainer_conf, config_arg_str):
return config_parser.parse_config(trainer_conf, config_arg_str)
def parse_network_config(network_conf, config_arg_str=''):
config = config_parser.parse_config(network_conf, config_arg_str)
return config.model_config
def parse_optimizer_config(optimizer_conf, config_arg_str=''):
config_parser.settings = copy.deepcopy(config_parser.DEFAULT_SETTING)
optimizer_conf()
opt_config = OptimizationConfig()
for k, v in config_parser.settings.iteritems():
if v is None:
continue
opt_config.__setattr__(k, v)
return opt_config
def reset_parser():
config_parser.begin_parse()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Data Sources are helpers to define paddle training data or testing data.
"""
from paddle.trainer.config_parser import *
from .utils import deprecated
try:
import cPickle as pickle
except ImportError:
import six.moves.cPickle as pickle
__all__ = ['define_py_data_sources2']
def define_py_data_source(file_list,
cls,
module,
obj,
args=None,
async=False,
data_cls=PyData):
"""
Define a python data source.
For example, the simplest usage in trainer_config.py as follow:
.. code-block:: python
define_py_data_source("train.list", TrainData, "data_provider", "process")
Or. if you want to pass arguments from trainer_config to data_provider.py, then
.. code-block:: python
define_py_data_source("train.list", TrainData, "data_provider", "process",
args={"dictionary": dict_name})
:param data_cls:
:param file_list: file list name, which contains all data file paths
:type file_list: basestring
:param cls: Train or Test Class.
:type cls: TrainData or TestData
:param module: python module name.
:type module: basestring
:param obj: python object name. May be a function name if using
PyDataProviderWrapper.
:type obj: basestring
:param args: The best practice is using dict to pass arguments into
DataProvider, and use :code:`@init_hook_wrapper` to
receive arguments.
:type args: string or picklable object
:param async: Load Data asynchronously or not.
:type async: bool
:return: None
:rtype: None
"""
if isinstance(file_list, list):
file_list_name = 'train.list'
if cls == TestData:
file_list_name = 'test.list'
with open(file_list_name, 'w') as f:
f.writelines(file_list)
file_list = file_list_name
if not isinstance(args, basestring) and args is not None:
args = pickle.dumps(args, 0)
cls(
data_cls(
files=file_list,
load_data_module=module,
load_data_object=obj,
load_data_args=args,
async_load_data=async))
def define_py_data_sources(train_list,
test_list,
module,
obj,
args=None,
train_async=False,
data_cls=PyData):
"""
The annotation is almost the same as define_py_data_sources2, except that
it can specific train_async and data_cls.
:param data_cls:
:param train_list: Train list name.
:type train_list: basestring
:param test_list: Test list name.
:type test_list: basestring
:param module: python module name. If train and test is different, then
pass a tuple or list to this argument.
:type module: basestring or tuple or list
:param obj: python object name. May be a function name if using
PyDataProviderWrapper. If train and test is different, then pass
a tuple or list to this argument.
:type obj: basestring or tuple or list
:param args: The best practice is using dict() to pass arguments into
DataProvider, and use :code:`@init_hook_wrapper` to receive
arguments. If train and test is different, then pass a tuple
or list to this argument.
:type args: string or picklable object or list or tuple.
:param train_async: Is training data load asynchronously or not.
:type train_async: bool
:return: None
:rtype: None
"""
def __is_splitable__(o):
return (isinstance(o, list) or
isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
assert train_list is not None or test_list is not None
assert module is not None and obj is not None
test_module = module
train_module = module
if __is_splitable__(module):
train_module, test_module = module
test_obj = obj
train_obj = obj
if __is_splitable__(obj):
train_obj, test_obj = obj
if args is None:
args = ""
train_args = args
test_args = args
if __is_splitable__(args):
train_args, test_args = args
if train_list is not None:
define_py_data_source(train_list, TrainData, train_module, train_obj,
train_args, train_async, data_cls)
if test_list is not None:
define_py_data_source(test_list, TestData, test_module, test_obj,
test_args, False, data_cls)
def define_py_data_sources2(train_list, test_list, module, obj, args=None):
"""
Define python Train/Test data sources in one method. If train/test use
the same Data Provider configuration, module/obj/args contain one argument,
otherwise contain a list or tuple of arguments. For example\:
.. code-block:: python
define_py_data_sources2(train_list="train.list",
test_list="test.list",
module="data_provider"
# if train/test use different configurations,
# obj=["process_train", "process_test"]
obj="process",
args={"dictionary": dict_name})
The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` .
:param train_list: Train list name.
:type train_list: basestring
:param test_list: Test list name.
:type test_list: basestring
:param module: python module name. If train and test is different, then
pass a tuple or list to this argument.
:type module: basestring or tuple or list
:param obj: python object name. May be a function name if using
PyDataProviderWrapper. If train and test is different, then pass
a tuple or list to this argument.
:type obj: basestring or tuple or list
:param args: The best practice is using dict() to pass arguments into
DataProvider, and use :code:`@init_hook_wrapper` to receive
arguments. If train and test is different, then pass a tuple
or list to this argument.
:type args: string or picklable object or list or tuple.
:return: None
:rtype: None
"""
def py_data2(files, load_data_module, load_data_object, load_data_args,
**kwargs):
data = create_data_config_proto()
data.type = 'py2'
data.files = files
data.load_data_module = load_data_module
data.load_data_object = load_data_object
data.load_data_args = load_data_args
data.async_load_data = False
return data
define_py_data_sources(
train_list=train_list,
test_list=test_list,
module=module,
obj=obj,
args=args,
data_cls=py_data2)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import inspect
from .attrs import ParamAttr
from .activations import TanhActivation
from paddle.trainer.config_parser import *
__all__ = [
'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
'wrap_act_default', 'wrap_param_default'
]
def __default_not_set_callback__(kwargs, name):
return name not in kwargs or kwargs[name] is None
def wrap_param_default(param_names=None,
default_factory=None,
not_set_callback=__default_not_set_callback__):
assert param_names is not None
assert isinstance(param_names, list) or isinstance(param_names, tuple)
for each_param_name in param_names:
assert isinstance(each_param_name, basestring)
def __impl__(func):
@functools.wraps(func)
def __wrapper__(*args, **kwargs):
if len(args) != 0:
argspec = inspect.getargspec(func)
num_positional = len(argspec.args)
if argspec.defaults:
num_positional -= len(argspec.defaults)
if not argspec.varargs and len(args) > num_positional:
logger.fatal(
"Must use keyword arguments for non-positional args")
for name in param_names:
if not_set_callback(kwargs, name): # Not set
kwargs[name] = default_factory(func)
return func(*args, **kwargs)
if hasattr(func, 'argspec'):
__wrapper__.argspec = func.argspec
else:
__wrapper__.argspec = inspect.getargspec(func)
return __wrapper__
return __impl__
class DefaultNameFactory(object):
def __init__(self, name_prefix):
self.__counter__ = 0
self.__name_prefix__ = name_prefix
def __call__(self, func):
if self.__name_prefix__ is None:
self.__name_prefix__ = func.__name__
tmp = "__%s_%d__" % (self.__name_prefix__, self.__counter__)
self.__check_name__(tmp)
self.__counter__ += 1
return tmp
def __check_name__(self, nm):
"""
@TODO(yuyang18): Implement it!
@param nm:
@return:
"""
pass
def reset(self):
self.__counter__ = 0
_name_factories = []
def reset_hook():
for factory in _name_factories:
factory.reset()
register_parse_config_hook(reset_hook)
def wrap_name_default(name_prefix=None, name_param="name"):
"""
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python
@wrap_name_default("some_name")
def func(name=None):
print name # name will never be None. If name is not set,
# name will be "some_name_%d"
:param name_prefix: name prefix. wrapped function's __name__ if None.
:type name_prefix: basestring
:return: a decorator to set default name
:rtype: callable
"""
factory = DefaultNameFactory(name_prefix)
_name_factories.append(factory)
return wrap_param_default([name_param], factory)
def wrap_param_attr_default(param_names=None, default_factory=None):
"""
Setting Default Parameter Attributes Decorator.
:param default_factory:
:param param_names: Parameter Attribute's Names, list of string
:type param_names: list
:return: decorator
"""
if param_names is None:
param_names = ['param_attr']
if default_factory is None:
default_factory = lambda _: ParamAttr()
return wrap_param_default(param_names, default_factory)
def wrap_bias_attr_default(param_names=None,
default_factory=None,
has_bias=True):
if param_names is None:
param_names = ['bias_attr']
if default_factory is None:
default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
def __bias_attr_not_set__(kwargs, name):
if has_bias:
return name not in kwargs or kwargs[name] is None or \
kwargs[name] == True
else:
return name in kwargs and kwargs[name] == True
return wrap_param_default(param_names, default_factory,
__bias_attr_not_set__)
def wrap_act_default(param_names=None, act=None):
if param_names is None:
param_names = ["act"]
if act is None:
act = TanhActivation()
return wrap_param_default(param_names, lambda _: act)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .layers import LayerOutput, mixed_layer, identity_projection, \
slope_intercept_layer, scaling_layer, repeat_layer
from .attrs import is_compatible_with
from .default_decorators import *
import activations as act
from paddle.trainer.config_parser import logger
__all__ = []
def register_unary_math_op(op_name, act):
def op(input, name=None):
return mixed_layer(
input=[identity_projection(input=input)], name=name, act=act)
op = wrap_name_default(op_name)(op)
op.__doc__ = type(act).__doc__
globals()[op_name] = op
__all__.append(op_name)
register_unary_math_op('exp', act.ExpActivation())
register_unary_math_op('log', act.LogActivation())
register_unary_math_op('abs', act.AbsActivation())
register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation())
register_unary_math_op('relu', act.ReluActivation())
register_unary_math_op('sqrt', act.SqrtActivation())
register_unary_math_op('reciprocal', act.ReciprocalActivation())
def add(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number")
if layeroutput.size == other.size:
return mixed_layer(input=[
identity_projection(input=layeroutput),
identity_projection(input=other)
])
if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" %
(layeroutput.size, other.size))
elif layeroutput.size == 1:
tmp = layeroutput
layeroutput = other
other = tmp
other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[
identity_projection(input=layeroutput), identity_projection(input=other)
])
LayerOutput.__radd__ = add
LayerOutput.__add__ = add
def sub(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=-other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be subtracted with"
" another Layeroutput or a number")
neg = slope_intercept_layer(input=other, slope=-1.0)
return add(layeroutput, neg)
LayerOutput.__sub__ = sub
def rsub(layeroutput, other):
neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
return add(neg, other)
LayerOutput.__rsub__ = rsub
def mul(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be multiplied with"
" another Layeroutput or a number")
elif layeroutput.size == 1:
return scaling_layer(input=other, weight=layeroutput)
elif other.size == 1:
return scaling_layer(input=layeroutput, weight=other)
else:
logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1")
LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import Settings, default_decay_rate, \
default_gradient_clipping_threshold, default_momentum
from .default_decorators import wrap_param_default
__all__ = [
'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
'L2Regularization', 'settings', 'ModelAverage'
]
class Optimizer(object):
def to_setting_kwargs(self):
raise NotImplementedError()
def extra_settings(self):
pass
@property
def is_support_sparse(self):
return True
class BaseSGDOptimizer(Optimizer):
"""
SGD Optimizer.
SGD is an optimization method, trying to find a neural network that
minimize the "cost/error" of it by iteration. In paddle's implementation
SGD Optimizer is synchronized, which means all gradients will be wait to
calculate and reduced into one gradient, then do optimize operation.
The neural network consider the learning problem of minimizing an objective
function, that has the form of a sum
.. math::
Q(w) = \\sum_{i}^{n} Q_i(w)
The value of function Q sometimes is the cost of neural network (Mean
Square Error between prediction and label for example). The function Q is
parametrised by w, the weight/bias of neural network. And weights is what to
be learned. The i is the i-th observation in (trainning) data.
So, the SGD method will optimize the weight by
.. math::
w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
where :math:`\\eta` is learning rate. And :math:`n` is batch size.
"""
def to_setting_kwargs(self):
raise NotImplementedError()
class MomentumOptimizer(BaseSGDOptimizer):
"""
MomentumOptimizer.
When sparse=True, the update scheme:
.. math::
\\alpha_t &= \\alpha_{t-1} / k \\\\
\\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
\\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
where :math:`k` is momentum, :math:`\\lambda` is decay rate,
:math:`\\gamma_t` is learning rate at the t'th step.
:param sparse: with sparse support or not.
:type sparse: bool
"""
def extra_settings(self):
default_momentum(self.momentum)
def to_setting_kwargs(self):
if self.sparse:
return {'learning_method': 'sparse_momentum'}
else:
return {'learning_method': 'momentum'}
def __init__(self, momentum=None, sparse=False):
self.momentum = momentum
self.sparse = sparse
class AdamOptimizer(BaseSGDOptimizer):
"""
Adam optimizer.
The details of please refer `Adam: A Method for Stochastic Optimization
<https://arxiv.org/abs/1412.6980>`_
.. math::
m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
:param beta1: the :math:`\\beta_1` in equation.
:type beta1: float
:param beta2: the :math:`\\beta_2` in equation.
:type beta2: float
:param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
divided by zero.
:type epsilon: float
"""
@property
def is_support_sparse(self):
return False
def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8):
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
def to_setting_kwargs(self):
return {
'learning_method': 'adam',
'adam_beta1': self.beta1,
'adam_beta2': self.beta2,
'adam_epsilon': self.epsilon
}
class AdamaxOptimizer(BaseSGDOptimizer):
"""
Adamax optimizer.
The details of please refer this `Adam: A Method for Stochastic Optimization
<https://arxiv.org/abs/1412.6980>`_
.. math::
m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
:param beta1: the :math:`\\beta_1` in the equation.
:type beta1: float
:param beta2: the :math:`\\beta_2` in the equation.
:type beta2: float
"""
def __init__(self, beta1, beta2):
self.beta1 = beta1
self.beta2 = beta2
def to_setting_kwargs(self):
return {
'learning_method': 'adamax',
'adam_beta1': self.beta1,
'adam_beta2': self.beta2
}
@property
def is_support_sparse(self):
return False
class AdaGradOptimizer(BaseSGDOptimizer):
"""
Adagrad(for ADAptive GRAdient algorithm) optimizer.
For details please refer this `Adaptive Subgradient Methods for
Online Learning and Stochastic Optimization
<http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
.. math::
G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
"""
def to_setting_kwargs(self):
return {'learning_method': 'adagrad'}
def __init__(self):
pass
class RMSPropOptimizer(BaseSGDOptimizer):
"""
RMSProp(for Root Mean Square Propagation) optimizer. For details please
refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
lecture_slides_lec6.pdf>`_.
The equations of this method as follows:
.. math::
v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
:param rho: the :math:`\\rho` in the equation. The forgetting factor.
:type rho: float
:param epsilon: the :math:`\\epsilon` in the equation.
:type epsilon: float
"""
def to_setting_kwargs(self):
return {
'learning_method': 'rmsprop',
'ada_rou': self.rho,
'ada_epsilon': self.epsilon
}
def __init__(self, rho=0.95, epsilon=1e-6):
self.rho = rho
self.epsilon = epsilon
class DecayedAdaGradOptimizer(BaseSGDOptimizer):
"""
AdaGrad method with decayed sum gradients. The equations of this method
show as follow.
.. math::
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
:param rho: The :math:`\\rho` parameter in that equation
:type rho: float
:param epsilon: The :math:`\\epsilon` parameter in that equation.
:type epsilon: float
"""
def to_setting_kwargs(self):
return {
'learning_method': 'decayed_adagrad',
'ada_rou': self.rho,
'ada_epsilon': self.epsilon
}
def __init__(self, rho=0.95, epsilon=1e-6):
self.rho = rho
self.epsilon = epsilon
class AdaDeltaOptimizer(BaseSGDOptimizer):
"""
AdaDelta method. The details of adadelta please refer to this
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
<http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
.. math::
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
E(g_t^2) + \\epsilon ) ) \\\\
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
:param rho: :math:`\\rho` in equation
:type rho: float
:param epsilon: :math:`\\rho` in equation
:type epsilon: float
"""
def to_setting_kwargs(self):
return {
'learning_method': 'adadelta',
'ada_rou': self.rho,
'ada_epsilon': self.epsilon
}
def __init__(self, rho=0.95, epsilon=1e-6):
self.rho = rho
self.epsilon = epsilon
class BaseRegularization(Optimizer):
def __init__(self):
self.algorithm = ""
self.learning_method = ""
def to_setting_kwargs(self):
return {}
class L2Regularization(BaseRegularization):
def __init__(self, rate):
super(L2Regularization, self).__init__()
self.decay_rate = rate
def to_setting_kwargs(self):
if self.algorithm == 'owlqn':
return {'l2weight': self.decay_rate}
else:
return dict()
def extra_settings(self):
if self.algorithm == 'sgd' or self.algorithm == 'async_sgd':
default_decay_rate(self.decay_rate)
class ModelAverage(Optimizer):
def to_setting_kwargs(self):
return {
'average_window': self.average_window,
'max_average_window': self.max_average_window,
'do_average_in_cpu': self.do_average_in_cpu
}
def __init__(self,
average_window,
max_average_window=None,
do_average_in_cpu=False):
self.average_window = average_window
self.max_average_window = max_average_window
self.do_average_in_cpu = do_average_in_cpu
class GradientClippingThreshold(Optimizer):
def extra_settings(self):
default_gradient_clipping_threshold(self.threshold)
def __init__(self, threshold):
self.threshold = threshold
def to_setting_kwargs(self):
return dict()
def __extends__(dict1, dict2):
for key in dict2:
assert key not in dict1
dict1[key] = dict2[key]
return dict1
@wrap_param_default(
['learning_method'], default_factory=lambda _: MomentumOptimizer())
@wrap_param_default(
['regularization'], default_factory=lambda _: BaseRegularization())
def settings(batch_size,
learning_rate=1e-3,
learning_rate_decay_a=0.,
learning_rate_decay_b=0.,
learning_rate_schedule='poly',
learning_rate_args='',
async_lagged_grad_discard_ratio=1.5,
learning_method=None,
regularization=None,
is_async=False,
model_average=None,
gradient_clipping_threshold=None):
"""
Set the optimization method, learning rate, batch size, and other training
settings. The currently supported algorithms are SGD and Async-SGD.
.. warning::
Note that the 'batch_size' in PaddlePaddle is not equal to global
training batch size. It represents the single training process's batch
size. If you use N processes to train one model, for example use three
GPU machines, the global batch size is N*'batch_size'.
:param batch_size: batch size for one training process.
:type batch_size: int
:param learning_rate: learning rate for SGD
:type learning_rate: float
:param learning_method: The extension optimization algorithms of gradient
descent, such as momentum, adagrad, rmsprop, etc.
Note that it should be instance with base type
BaseSGDOptimizer.
:type learning_method: BaseSGDOptimizer
:param regularization: The regularization method.
:type regularization: BaseRegularization
:param is_async: Is Async-SGD or not. Default value is False.
:type is_async: bool
:param model_average: Model Average Settings.
:type model_average: ModelAverage
:param gradient_clipping_threshold: gradient clipping threshold. If gradient
value larger than some value, will be
clipped.
:type gradient_clipping_threshold: float
:param async_lagged_grad_discard_ratio: async SGD gradient commit control,
when async_lagged_grad_discard_ratio * num_gradient_servers commit passed,
the current async SGD gradient is discarded.
:type async_lagged_grad_discard_ratio: float
"""
if isinstance(regularization, BaseRegularization):
regularization = [regularization]
assert isinstance(learning_method, Optimizer)
if isinstance(learning_method, BaseSGDOptimizer):
algorithm = 'async_sgd' if is_async else 'sgd'
else:
algorithm = 'owlqn'
args = [
'batch_size', 'learning_rate', 'learning_rate_decay_a',
'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
'gradient_clipping_threshold', 'async_lagged_grad_discard_ratio'
]
kwargs = dict()
kwargs['algorithm'] = algorithm
for arg in args:
kwargs[arg] = locals()[arg]
kwargs = __extends__(kwargs, learning_method.to_setting_kwargs())
learning_method.extra_settings()
for regular in regularization:
assert isinstance(regular, BaseRegularization)
regular.algorithm = algorithm
regular.learning_method = kwargs['learning_method']
kwargs = __extends__(kwargs, regular.to_setting_kwargs())
regular.extra_settings()
if gradient_clipping_threshold is not None:
gradient_clipping_threshold = GradientClippingThreshold(
threshold=gradient_clipping_threshold)
for each in [model_average, gradient_clipping_threshold]:
if each is not None:
assert isinstance(each, Optimizer)
each.algorithm = algorithm
each.learning_method = kwargs['learning_method']
kwargs = __extends__(kwargs, each.to_setting_kwargs())
each.extra_settings()
# Do Check?
Settings(**kwargs)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
"""
__all__ = [
"BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
"CudnnMaxPooling", "CudnnAvgPooling", "CudnnAvgInclPadPooling",
"SumPooling", "SquareRootNPooling"
]
class BasePoolingType(object):
"""
Base Pooling Type.
Note these pooling types are used for sequence input, not for images.
Each PoolingType contains one parameter:
:param name: pooling layer type name used by paddle.
:type name: basestring
"""
def __init__(self, name):
self.name = name
class MaxPooling(BasePoolingType):
"""
Max pooling.
Return the very large values for each dimension in sequence or time steps.
.. math::
max(samples\\_of\\_a\\_sequence)
:param output_max_index: True if output sequence max index instead of max
value. None means use default value in proto.
:type output_max_index: bool|None
"""
def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index
class MaxWithMaskPooling(BasePoolingType):
"""
MaxWithMask pooling.
Not only return the very large values for each dimension in sequence or time steps,
but also the location indices of found maxinum values.
"""
def __init__(self):
BasePoolingType.__init__(self, "max-pool-with-mask")
class CudnnMaxPooling(BasePoolingType):
"""
Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool")
class CudnnAvgPooling(BasePoolingType):
"""
Cudnn average pooling only support GPU. Return the average value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool")
class CudnnAvgInclPadPooling(BasePoolingType):
"""
Cudnn average pooling only support GPU. Return the average value in the
pooling window taking into account the padding cells.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-incl-pad-pool")
class AvgPooling(BasePoolingType):
"""
Average pooling.
Return the average values for each dimension in sequence or time steps.
.. math::
sum(samples\\_of\\_a\\_sequence)/sample\\_num
"""
STRATEGY_AVG = "average"
STRATEGY_SUM = "sum"
STRATEGY_SQROOTN = "squarerootn"
def __init__(self, strategy=STRATEGY_AVG):
BasePoolingType.__init__(self, "average")
self.strategy = strategy
class SumPooling(AvgPooling):
"""
Sum pooling.
Return the sum values of each dimension in sequence or time steps.
.. math::
sum(samples\\_of\\_a\\_sequence)
"""
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
class SquareRootNPooling(AvgPooling):
"""
Square Root Pooling.
Return the square root values of each dimension in sequence or time steps.
.. math::
sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
"""
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
#################### test_config_parser #########################
add_test(NAME layers_test
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
add_test(NAME test_reset_hook
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp)
add_test(NAME test_layerHelpers
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
${PADDLE_BINARY_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <google/protobuf/text_format.h>
#include <google/protobuf/util/message_differencer.h>
#include <fstream>
#include <iostream>
#include "TrainerConfig.pb.h"
bool loadPb(google::protobuf::Message* conf, const std::string& filename) {
std::ifstream fin;
fin.open(filename.c_str());
if (fin.is_open()) {
std::string str((std::istreambuf_iterator<char>(fin)),
std::istreambuf_iterator<char>());
bool ok = google::protobuf::TextFormat::ParseFromString(str, conf);
fin.close();
return ok;
} else {
return false;
}
}
int main(int argc, char** argv) {
std::unique_ptr<google::protobuf::Message> config1;
std::unique_ptr<google::protobuf::Message> config2;
if (argc == 3) {
config1.reset(new paddle::ModelConfig());
config2.reset(new paddle::ModelConfig());
} else if (argc == 4) {
config1.reset(new paddle::TrainerConfig());
config2.reset(new paddle::TrainerConfig());
}
if (!config1 || !config2) {
return 1;
} else if (!loadPb(config1.get(), argv[1])) {
return 2;
} else if (!loadPb(config2.get(), argv[2])) {
return 3;
} else {
if (google::protobuf::util::MessageDifferencer::ApproximatelyEquals(
*config1, *config2)) {
return 0;
} else {
return 4;
}
}
}
#!/bin/bash
export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer
test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer
test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer
test_factorization_machine)
export whole_configs=(test_split_datasource)
#!/bin/bash
set -e
cd `dirname $0`
protostr=$PWD/protostr
. file_list.sh
for conf in ${configs[*]}
do
echo "Generating " $conf
$1 -m paddle.utils.dump_config $conf.py > $protostr/$conf.protostr.unittest
if [ ! -f "$protostr/$conf.protostr" ]; then
cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
fi
cat ${conf}.py |$1 test_config_parser_for_non_file_config.py > $protostr/$conf.protostr.non_file_config.unittest
done
for conf in ${whole_configs[*]}
do
echo "Generating " $conf
$1 -m paddle.utils.dump_config $conf.py "" --whole > $protostr/$conf.protostr.unittest
if [ ! -f "$protostr/$conf.protostr" ]; then
cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
fi
cat ${conf}.py |$1 test_config_parser_for_non_file_config.py --whole > $protostr/$conf.protostr.non_file_config.unittest
done
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(learning_rate=1e-3, batch_size=1000)
img = data_layer(name='image', size=256 * 256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(
input=img,
num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
dilation=(1, 1),
stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(learning_rate=1e-3, batch_size=1000)
img = data_layer(name='image', size=227 * 227)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(
input=img,
num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
stride=(1, 1),
act=LinearActivation(),
trans=True)
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
din = data_layer(name='data', size=30)
seq_op = [first_seq, last_seq]
agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
opts = []
for op in seq_op:
for al in agg_level:
opts.append(op(input=din, agg_level=al))
for op in seq_op:
opts.append(
op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5))
outputs(opts)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Test all activations.
'''
from paddle.trainer_config_helpers import *
settings(learning_rate=1e-4, batch_size=1000)
din = data_layer(name='input', size=100)
acts = [
TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
LinearActivation, ExpActivation, ReluActivation, BReluActivation,
SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
]
outputs([
fc_layer(
input=din, size=100, act=act(), name="layer_%d" % i)
for i, act in enumerate(acts)
])
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
x = data_layer(name='data', size=100)
x = layer_math.exp(x)
x = layer_math.sqrt(x)
x = layer_math.reciprocal(x)
x = layer_math.log(x)
x = layer_math.abs(x)
x = layer_math.sigmoid(x)
x = layer_math.tanh(x)
x = layer_math.square(x)
x = layer_math.relu(x)
y = 1 + x
y = y + 1
y = x + y
y = y - x
y = y - 2
y = 2 - y
y = 2 * y
y = y * 3
z = data_layer(name='data_2', size=1)
y = y * z
y = z * y
y = y + z
y = z + y
outputs(y)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Test mixed layer, projections and operators.
'''
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-4)
din = data_layer(name='test', size=100)
din = embedding_layer(input=din, size=256)
with mixed_layer(size=100) as m1:
m1 += full_matrix_projection(input=din)
with mixed_layer(size=100) as m2:
m2 += table_projection(input=m1)
with mixed_layer(size=100) as m3:
m3 += identity_projection(input=m2)
with mixed_layer(size=100) as m4:
m4 += dotmul_projection(input=m3)
with mixed_layer() as m5:
m5 += context_projection(input=m4, context_len=3)
with mixed_layer() as m6:
m6 += dotmul_operator(a=m3, b=m4)
m6 += scaling_projection(m3)
img = data_layer(name='img', size=32 * 32)
flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
with mixed_layer() as m7:
m7 += conv_operator(
img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1)
with mixed_layer() as m8:
m8 += conv_operator(
img=img,
filter=flt,
num_filters=64,
num_channels=1,
filter_size=3,
stride=2,
padding=1,
trans=True)
m8 += conv_projection(
img,
filter_size=3,
num_filters=64,
num_channels=1,
stride=2,
padding=1,
trans=True)
end = mixed_layer(
input=[
full_matrix_projection(input=m5),
trans_full_matrix_projection(input=m6),
full_matrix_projection(input=m7), full_matrix_projection(input=m8)
],
size=100,
layer_attr=ExtraAttr(
drop_rate=0.5, error_clipping_threshold=40))
outputs(end)
type: "nn"
layers {
name: "image"
type: "data"
size: 65536
active_type: ""
}
layers {
name: "__conv_0__"
type: "exconv"
size: 3297856
active_type: ""
inputs {
input_layer_name: "image"
input_parameter_name: "___conv_0__.w0"
conv_conf {
filter_size: 32
channels: 1
stride: 1
padding: 1
groups: 1
filter_channels: 1
output_x: 227
img_size: 256
caffe_mode: true
filter_size_y: 32
padding_y: 1
stride_y: 1
output_y: 227
img_size_y: 256
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
num_filters: 64
shared_biases: true
height: 227
width: 227
}
layers {
name: "__batch_norm_0__"
type: "batch_norm"
size: 3297856
active_type: "relu"
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w0"
image_conf {
channels: 64
img_size: 227
img_size_y: 227
}
}
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w1"
}
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w2"
}
bias_parameter_name: "___batch_norm_0__.wbias"
moving_average_fraction: 0.9
height: 227
width: 227
depth: 1
epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
type: "norm"
size: 3297856
active_type: ""
inputs {
input_layer_name: "__batch_norm_0__"
norm_conf {
norm_type: "cmrnorm-projection"
channels: 64
size: 32
scale: 0.0004
pow: 0.75
output_x: 227
img_size: 227
blocked: false
output_y: 227
img_size_y: 227
}
}
height: 227
width: 227
}
layers {
name: "__pool_0__"
type: "pool"
size: 2458624
active_type: ""
inputs {
input_layer_name: "__conv_0__"
pool_conf {
pool_type: "max-projection"
channels: 64
size_x: 32
stride: 1
output_x: 196
img_size: 227
padding: 0
size_y: 32
stride_y: 1
output_y: 196
img_size_y: 227
padding_y: 0
}
}
height: 196
width: 196
}
parameters {
name: "___conv_0__.w0"
size: 65536
initial_mean: 0.0
initial_std: 0.0441941738242
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___conv_0__.wbias"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 64
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___batch_norm_0__.w0"
size: 64
initial_mean: 1.0
initial_std: 0.0
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___batch_norm_0__.w1"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
is_static: true
is_shared: true
}
parameters {
name: "___batch_norm_0__.w2"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
is_static: true
is_shared: true
}
parameters {
name: "___batch_norm_0__.wbias"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
}
input_layer_names: "image"
output_layer_names: "__pool_0__"
output_layer_names: "__crmnorm_0__"
sub_models {
name: "root"
layer_names: "image"
layer_names: "__conv_0__"
layer_names: "__batch_norm_0__"
layer_names: "__crmnorm_0__"
layer_names: "__pool_0__"
input_layer_names: "image"
output_layer_names: "__pool_0__"
output_layer_names: "__crmnorm_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 30
active_type: ""
}
layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
select_first: true
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__first_seq_1__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
select_first: true
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__first_seq_2__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
select_first: true
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 30
active_type: ""
inputs {
input_layer_name: "data"
}
trans_type: "non-seq"
seq_pool_stride: 5
}
input_layer_names: "data"
output_layer_names: "__first_seq_0__"
output_layer_names: "__first_seq_1__"
output_layer_names: "__last_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__first_seq_2__"
output_layer_names: "__last_seq_2__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__first_seq_0__"
layer_names: "__first_seq_1__"
layer_names: "__last_seq_0__"
layer_names: "__last_seq_1__"
layer_names: "__first_seq_2__"
layer_names: "__last_seq_2__"
input_layer_names: "data"
output_layer_names: "__first_seq_0__"
output_layer_names: "__first_seq_1__"
output_layer_names: "__last_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__first_seq_2__"
output_layer_names: "__last_seq_2__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__clip_0__"
type: "clip"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
clip_conf {
min: -10
max: 10
}
}
}
input_layer_names: "input"
output_layer_names: "__clip_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__clip_0__"
input_layer_names: "input"
output_layer_names: "__clip_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "vector1"
type: "data"
size: 10
active_type: ""
}
layers {
name: "vector2"
type: "data"
size: 10
active_type: ""
}
layers {
name: "__dot_prod_layer_0__"
type: "dot_prod"
size: 1
active_type: ""
inputs {
input_layer_name: "vector1"
}
inputs {
input_layer_name: "vector2"
}
}
input_layer_names: "vector1"
input_layer_names: "vector2"
output_layer_names: "__dot_prod_layer_0__"
sub_models {
name: "root"
layer_names: "vector1"
layer_names: "vector2"
layer_names: "__dot_prod_layer_0__"
input_layer_names: "vector1"
input_layer_names: "vector2"
output_layer_names: "__dot_prod_layer_0__"
is_recurrent_layer_group: false
}
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-4)
din = data_layer(name='data', size=120)
outputs(bidirectional_gru(input=din, size=40, return_seq=True))
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册