提交 58e1b3b3 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #446 from QiJune/format_py_code_2nd

format python code in python directory
...@@ -11,4 +11,3 @@ ...@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
...@@ -18,8 +18,7 @@ import collections ...@@ -18,8 +18,7 @@ import collections
import functools import functools
import itertools import itertools
logging.basicConfig( logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s") " %(message)s")
...@@ -132,8 +131,10 @@ class InputOrderWrapper(object): ...@@ -132,8 +131,10 @@ class InputOrderWrapper(object):
def __call__(self, obj, filename): def __call__(self, obj, filename):
for item in self.generator(obj, filename): for item in self.generator(obj, filename):
if isinstance(item, dict): if isinstance(item, dict):
yield [item.get(input_name, None) for input_name in yield [
self.input_order] item.get(input_name, None)
for input_name in self.input_order
]
else: else:
yield item yield item
...@@ -162,8 +163,8 @@ class CheckWrapper(object): ...@@ -162,8 +163,8 @@ class CheckWrapper(object):
yield items yield items
except AssertionError as e: except AssertionError as e:
self.logger.warning( self.logger.warning(
"Item (%s) is not fit the input type with error %s" "Item (%s) is not fit the input type with error %s" %
% (repr(item), repr(e))) (repr(item), repr(e)))
if self.check_fail_continue: if self.check_fail_continue:
continue continue
...@@ -202,13 +203,17 @@ class CheckWrapper(object): ...@@ -202,13 +203,17 @@ class CheckWrapper(object):
callback(each) callback(each)
def provider(input_types=None, should_shuffle=None, pool_size=-1, def provider(input_types=None,
should_shuffle=None,
pool_size=-1,
min_pool_size=-1, min_pool_size=-1,
can_over_batch_size=True, can_over_batch_size=True,
calc_batch_size=None, calc_batch_size=None,
cache=CacheType.NO_CACHE, cache=CacheType.NO_CACHE,
check=False, check_fail_continue=False, check=False,
init_hook=None, **kwargs): check_fail_continue=False,
init_hook=None,
**kwargs):
""" """
Provider decorator. Use it to make a function into PyDataProvider2 object. Provider decorator. Use it to make a function into PyDataProvider2 object.
In this function, user only need to get each sample for some train/test In this function, user only need to get each sample for some train/test
...@@ -318,8 +323,8 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1, ...@@ -318,8 +323,8 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
"Could not recognize should_shuffle (%s), " "Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle." "just use default value of should_shuffle."
" Please set should_shuffle to bool value or " " Please set should_shuffle to bool value or "
"something in %s" % ( "something in %s" %
repr(self.should_shuffle), (repr(self.should_shuffle),
repr(true_table + false_table))) repr(true_table + false_table)))
self.should_shuffle = None self.should_shuffle = None
...@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1, ...@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
self.generator = InputOrderWrapper(self.generator, self.generator = InputOrderWrapper(self.generator,
self.input_order) self.input_order)
if self.check: if self.check:
self.generator = CheckWrapper(self.generator, self.generator = CheckWrapper(self.generator, self.slots,
self.slots,
check_fail_continue, check_fail_continue,
self.logger) self.logger)
...@@ -368,4 +372,3 @@ def deserialize_args(args): ...@@ -368,4 +372,3 @@ def deserialize_args(args):
:return: :return:
""" """
return cPickle.loads(args) return cPickle.loads(args)
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
This module provide a wrapper(decorator) to wrap a data process method into a This module provide a wrapper(decorator) to wrap a data process method into a
PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_. PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_.
...@@ -47,6 +46,7 @@ except ImportError: ...@@ -47,6 +46,7 @@ except ImportError:
import io import io
class SlotType(object): # Just a hint for user. class SlotType(object): # Just a hint for user.
pass pass
...@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType): ...@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType):
- **SubSeq**: [[[int, int, ...], [int, ....], ...] , \ - **SubSeq**: [[[int, int, ...], [int, ....], ...] , \
[[int, int, ...], [int, ....], ...] , ...] [[int, int, ...], [int, ....], ...] , ...]
""" """
def __init__(self, dim): def __init__(self, dim):
""" """
:param dim: slot dimension :param dim: slot dimension
...@@ -294,8 +295,9 @@ class GeneralPyDataProvider: ...@@ -294,8 +295,9 @@ class GeneralPyDataProvider:
fn = "%s_%d" % (self.profile_filename, self.profile_count) fn = "%s_%d" % (self.profile_filename, self.profile_count)
sortby = "cumulative" sortby = "cumulative"
with open(fn, "w") as f: with open(fn, "w") as f:
pstats.Stats(self.profiler, stream=f).sort_stats( pstats.Stats(
sortby).print_stats() self.profiler,
stream=f).sort_stats(sortby).print_stats()
self.logger.info("saving profile to file %s" % fn) self.logger.info("saving profile to file %s" % fn)
self.profile_count += 1 self.profile_count += 1
self.logger.info("resetting profile") self.logger.info("resetting profile")
...@@ -453,9 +455,10 @@ class GeneralPyDataProvider: ...@@ -453,9 +455,10 @@ class GeneralPyDataProvider:
seq_stream.flush() seq_stream.flush()
subseq_stream.flush() subseq_stream.flush()
return "".join([self.int_packer.pack(current_batch_size), return "".join([
data_bytes.getvalue(), self.int_packer.pack(current_batch_size), data_bytes.getvalue(),
seq_bytes.getvalue(), subseq_bytes.getvalue()]) seq_bytes.getvalue(), subseq_bytes.getvalue()
])
finally: finally:
data_stream.close() data_stream.close()
...@@ -516,7 +519,7 @@ class GeneralPyDataProvider: ...@@ -516,7 +519,7 @@ class GeneralPyDataProvider:
self.data_pool[idx]) self.data_pool[idx])
idx -= 1 idx -= 1
ret_list += self.data_pool[self.data_pool_idx: idx + 1] ret_list += self.data_pool[self.data_pool_idx:idx + 1]
# for speed reason, just shift left index, not delete data actually. # for speed reason, just shift left index, not delete data actually.
self.data_pool_idx = idx + 1 self.data_pool_idx = idx + 1
...@@ -537,8 +540,8 @@ class GeneralPyDataProvider: ...@@ -537,8 +540,8 @@ class GeneralPyDataProvider:
if self.max_pool_size == 0: if self.max_pool_size == 0:
for i in xrange(min(self.file_count, len(self.generators))): for i in xrange(min(self.file_count, len(self.generators))):
self.data_pool += list(self.generators[i]) self.data_pool += list(self.generators[i])
self.generators = self.generators[ self.generators = self.generators[min(self.file_count,
min(self.file_count, len(self.generators)):] len(self.generators)):]
self.max_pool_size = len(self.data_pool) self.max_pool_size = len(self.data_pool)
else: else:
while len(self.data_pool) < self.max_pool_size and len( while len(self.data_pool) < self.max_pool_size and len(
...@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs): ...@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs):
del cls, args, kwargs del cls, args, kwargs
def provider(slots=None, use_seq=False, should_shuffle=True, pool_size=1, def provider(slots=None,
can_over_batch_size=True, calc_batch_size=lambda data: 1, use_seq=False,
debug=False, init_hook=default_init_hook, profile_filename=None): should_shuffle=True,
pool_size=1,
can_over_batch_size=True,
calc_batch_size=lambda data: 1,
debug=False,
init_hook=default_init_hook,
profile_filename=None):
""" """
The decorator for PyDataProvider. User should use this to create Provider class. The decorator for PyDataProvider. User should use this to create Provider class.
User should only concern how to read sample from file. User should only concern how to read sample from file.
......
...@@ -11,4 +11,3 @@ ...@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
...@@ -17,8 +17,7 @@ from paddle.proto.DataConfig_pb2 import DataConfig ...@@ -17,8 +17,7 @@ from paddle.proto.DataConfig_pb2 import DataConfig
g_config = None g_config = None
def SimpleData( def SimpleData(files=None,
files=None,
feat_dim=None, feat_dim=None,
context_len=None, context_len=None,
buffer_capacity=None): buffer_capacity=None):
...@@ -33,6 +32,7 @@ def SimpleData( ...@@ -33,6 +32,7 @@ def SimpleData(
data_config.buffer_capacity = buffer_capacity data_config.buffer_capacity = buffer_capacity
return data_config return data_config
def get_config_funcs(trainer_config): def get_config_funcs(trainer_config):
global g_config global g_config
g_config = trainer_config g_config = trainer_config
......
...@@ -12,13 +12,12 @@ ...@@ -12,13 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ["TanhActivation", "SigmoidActivation", __all__ = [
"SoftmaxActivation", "IdentityActivation", "LinearActivation", "TanhActivation", "SigmoidActivation", "SoftmaxActivation",
'SequenceSoftmaxActivation', 'ExpActivation', "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
"ReluActivation", "BReluActivation", "SoftReluActivation", 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
"STanhActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation"
"AbsActivation", "SquareActivation", ]
"BaseActivation"]
class BaseActivation(object): class BaseActivation(object):
...@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation): ...@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation):
f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}} f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
""" """
def __init__(self): BaseActivation.__init__(self, 'tanh', True) def __init__(self):
BaseActivation.__init__(self, 'tanh', True)
class SigmoidActivation(BaseActivation): class SigmoidActivation(BaseActivation):
...@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation): ...@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation):
f(z) = \\frac{1}{1+exp(-z)} f(z) = \\frac{1}{1+exp(-z)}
""" """
def __init__(self): BaseActivation.__init__(self, 'sigmoid', True) def __init__(self):
BaseActivation.__init__(self, 'sigmoid', True)
class SoftmaxActivation(BaseActivation): class SoftmaxActivation(BaseActivation):
...@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation): ...@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation):
Just do nothing for output both forward/backward. Just do nothing for output both forward/backward.
""" """
def __init__(self): BaseActivation.__init__(self, '', False) def __init__(self):
BaseActivation.__init__(self, '', False)
LinearActivation = IdentityActivation LinearActivation = IdentityActivation
...@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation): ...@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation):
0 &\\quad\\mathrm{otherwize} 0 &\\quad\\mathrm{otherwize}
""" """
def __init__(self): BaseActivation.__init__(self, 'relu', True) def __init__(self):
BaseActivation.__init__(self, 'relu', True)
class BReluActivation(BaseActivation): class BReluActivation(BaseActivation):
...@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation): ...@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation):
0 &\\quad \\mathrm{otherwise} 0 &\\quad \\mathrm{otherwise}
""" """
def __init__(self): BaseActivation.__init__(self, 'brelu', False) def __init__(self):
BaseActivation.__init__(self, 'brelu', False)
class SoftReluActivation(BaseActivation): class SoftReluActivation(BaseActivation):
...@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation): ...@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation):
SoftRelu Activation. SoftRelu Activation.
""" """
def __init__(self): BaseActivation.__init__(self, 'softrelu', False) def __init__(self):
BaseActivation.__init__(self, 'softrelu', False)
class STanhActivation(BaseActivation): class STanhActivation(BaseActivation):
""" """
...@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation): ...@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation):
f(z) = 1.7159 * tanh(2/3*z) f(z) = 1.7159 * tanh(2/3*z)
""" """
def __init__(self): BaseActivation.__init__(self, 'stanh', False) def __init__(self):
BaseActivation.__init__(self, 'stanh', False)
class AbsActivation(BaseActivation): class AbsActivation(BaseActivation):
...@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation): ...@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation):
0 &\\quad if \\quad z = 0 0 &\\quad if \\quad z = 0
""" """
def __init__(self): BaseActivation.__init__(self, 'abs', False) def __init__(self):
BaseActivation.__init__(self, 'abs', False)
class SquareActivation(BaseActivation): class SquareActivation(BaseActivation):
...@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation): ...@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation):
f(z) = z^2. f(z) = z^2.
""" """
def __init__(self): BaseActivation.__init__(self, 'square', False) def __init__(self):
BaseActivation.__init__(self, 'square', False)
class ExpActivation(BaseActivation): class ExpActivation(BaseActivation):
""" """
...@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation): ...@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation):
.. math:: .. math::
f(z) = e^z. f(z) = e^z.
""" """
def __init__(self): BaseActivation.__init__(self, 'exponential', False)
def __init__(self):
BaseActivation.__init__(self, 'exponential', False)
class LogActivation(BaseActivation): class LogActivation(BaseActivation):
""" """
...@@ -207,4 +220,6 @@ class LogActivation(BaseActivation): ...@@ -207,4 +220,6 @@ class LogActivation(BaseActivation):
.. math:: .. math::
f(z) = log(z) f(z) = log(z)
""" """
def __init__(self): BaseActivation.__init__(self, 'log', False)
def __init__(self):
BaseActivation.__init__(self, 'log', False)
...@@ -13,8 +13,9 @@ ...@@ -13,8 +13,9 @@
# limitations under the License. # limitations under the License.
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute', __all__ = [
'ExtraLayerAttribute'] 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute'
]
def convert_and_compare(x, Type): def convert_and_compare(x, Type):
...@@ -25,7 +26,8 @@ def convert_and_compare(x, Type): ...@@ -25,7 +26,8 @@ def convert_and_compare(x, Type):
:param Type: target type to check x over :param Type: target type to check x over
""" """
return type(x)(Type(x))==x return type(x)(Type(x)) == x
def is_compatible_with(x, Type): def is_compatible_with(x, Type):
""" """
...@@ -91,9 +93,17 @@ class ParameterAttribute(object): ...@@ -91,9 +93,17 @@ class ParameterAttribute(object):
:type sparse_update: bool :type sparse_update: bool
""" """
def __init__(self, name=None, is_static=False, initial_std=None, def __init__(self,
initial_mean=None, initial_max=None, initial_min=None, name=None,
l1_rate=None, l2_rate=None, learning_rate=None, momentum=None, is_static=False,
initial_std=None,
initial_mean=None,
initial_max=None,
initial_min=None,
l1_rate=None,
l2_rate=None,
learning_rate=None,
momentum=None,
sparse_update=False): sparse_update=False):
# initialize strategy. # initialize strategy.
if is_static: if is_static:
...@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object): ...@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object):
:type device: int :type device: int
""" """
def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None): def __init__(self,
error_clipping_threshold=None,
drop_rate=None,
device=None):
self.attr = dict() self.attr = dict()
if isinstance(error_clipping_threshold, float): if isinstance(error_clipping_threshold, float):
assert error_clipping_threshold > 0 assert error_clipping_threshold > 0
...@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object): ...@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object):
for key in self.attr: for key in self.attr:
if not hasattr(self, 'can_%s' % key) or \ if not hasattr(self, 'can_%s' % key) or \
not getattr(self, 'can_%s' % key): not getattr(self, 'can_%s' % key):
raise NotImplementedError( raise NotImplementedError("Layer %s cannot support %s" %
"Layer %s cannot support %s" % (layer_name, key)) (layer_name, key))
@staticmethod @staticmethod
def to_kwargs(attr): def to_kwargs(attr):
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Data Sources are helpers to define paddle training data or testing data. Data Sources are helpers to define paddle training data or testing data.
""" """
...@@ -26,8 +25,12 @@ except ImportError: ...@@ -26,8 +25,12 @@ except ImportError:
__all__ = ['define_py_data_sources2'] __all__ = ['define_py_data_sources2']
def define_py_data_source(file_list, cls, module, def define_py_data_source(file_list,
obj, args=None, async=False, cls,
module,
obj,
args=None,
async=False,
data_cls=PyData): data_cls=PyData):
""" """
Define a python data source. Define a python data source.
...@@ -76,6 +79,7 @@ def define_py_data_source(file_list, cls, module, ...@@ -76,6 +79,7 @@ def define_py_data_source(file_list, cls, module,
args = pickle.dumps(args, 0) args = pickle.dumps(args, 0)
if data_cls is None: if data_cls is None:
def py_data2(files, load_data_module, load_data_object, load_data_args, def py_data2(files, load_data_module, load_data_object, load_data_args,
**kwargs): **kwargs):
data = DataBase() data = DataBase()
...@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module, ...@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module,
data.load_data_args = load_data_args data.load_data_args = load_data_args
data.async_load_data = True data.async_load_data = True
return data return data
data_cls = py_data2 data_cls = py_data2
cls(data_cls(files=file_list, cls(
data_cls(
files=file_list,
load_data_module=module, load_data_module=module,
load_data_object=obj, load_data_object=obj,
load_data_args=args, load_data_args=args,
async_load_data=async)) async_load_data=async))
def define_py_data_sources(train_list, test_list, module, obj, args=None, def define_py_data_sources(train_list,
train_async=False, data_cls=PyData): test_list,
module,
obj,
args=None,
train_async=False,
data_cls=PyData):
""" """
The annotation is almost the same as define_py_data_sources2, except that The annotation is almost the same as define_py_data_sources2, except that
it can specific train_async and data_cls. it can specific train_async and data_cls.
...@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None, ...@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None,
""" """
def __is_splitable__(o): def __is_splitable__(o):
return (isinstance(o, list) or isinstance(o, tuple) return (isinstance(o, list) or
) and hasattr(o, '__len__') and len(o) == 2 isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
assert train_list is not None or test_list is not None assert train_list is not None or test_list is not None
assert module is not None and obj is not None assert module is not None and obj is not None
...@@ -196,7 +208,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None): ...@@ -196,7 +208,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
:return: None :return: None
:rtype: None :rtype: None
""" """
define_py_data_sources(train_list=train_list, define_py_data_sources(
train_list=train_list,
test_list=test_list, test_list=test_list,
module=module, module=module,
obj=obj, obj=obj,
......
...@@ -18,16 +18,18 @@ from .attrs import ParamAttr ...@@ -18,16 +18,18 @@ from .attrs import ParamAttr
from .activations import TanhActivation from .activations import TanhActivation
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = ['wrap_name_default', 'wrap_param_attr_default', __all__ = [
'wrap_bias_attr_default', 'wrap_act_default', 'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
'wrap_param_default'] 'wrap_act_default', 'wrap_param_default'
]
def __default_not_set_callback__(kwargs, name): def __default_not_set_callback__(kwargs, name):
return name not in kwargs or kwargs[name] is None return name not in kwargs or kwargs[name] is None
def wrap_param_default(param_names=None, default_factory=None, def wrap_param_default(param_names=None,
default_factory=None,
not_set_callback=__default_not_set_callback__): not_set_callback=__default_not_set_callback__):
assert param_names is not None assert param_names is not None
assert isinstance(param_names, list) or isinstance(param_names, tuple) assert isinstance(param_names, list) or isinstance(param_names, tuple)
...@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None, ...@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None,
if argspec.defaults: if argspec.defaults:
num_positional -= len(argspec.defaults) num_positional -= len(argspec.defaults)
if not argspec.varargs and len(args) > num_positional: if not argspec.varargs and len(args) > num_positional:
logger.fatal("Must use keyword arguments for non-positional args") logger.fatal(
"Must use keyword arguments for non-positional args")
for name in param_names: for name in param_names:
if not_set_callback(kwargs, name): # Not set if not_set_callback(kwargs, name): # Not set
kwargs[name] = default_factory(func) kwargs[name] = default_factory(func)
...@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None): ...@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None):
return wrap_param_default(param_names, default_factory) return wrap_param_default(param_names, default_factory)
def wrap_bias_attr_default(param_names=None, default_factory=None, def wrap_bias_attr_default(param_names=None,
default_factory=None,
has_bias=True): has_bias=True):
if param_names is None: if param_names is None:
param_names = ['bias_attr'] param_names = ['bias_attr']
if default_factory is None: if default_factory is None:
default_factory = lambda _: ParamAttr(initial_std=0., default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
initial_mean=0.)
def __bias_attr_not_set__(kwargs, name): def __bias_attr_not_set__(kwargs, name):
if has_bias: if has_bias:
......
...@@ -15,13 +15,14 @@ ...@@ -15,13 +15,14 @@
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
from default_decorators import * from default_decorators import *
__all__ = ["evaluator_base","classification_error_evaluator", "auc_evaluator", __all__ = [
"pnpair_evaluator", "precision_recall_evaluator", "evaluator_base", "classification_error_evaluator", "auc_evaluator",
"ctc_error_evaluator", "chunk_evaluator", "sum_evaluator", "pnpair_evaluator", "precision_recall_evaluator", "ctc_error_evaluator",
"column_sum_evaluator", "value_printer_evaluator", "chunk_evaluator", "sum_evaluator", "column_sum_evaluator",
"gradient_printer_evaluator", "maxid_printer_evaluator", "value_printer_evaluator", "gradient_printer_evaluator",
"maxframe_printer_evaluator", "seqtext_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator",
"classification_error_printer_evaluator"] "seqtext_printer_evaluator", "classification_error_printer_evaluator"
]
class EvaluatorAttribute(object): class EvaluatorAttribute(object):
...@@ -32,10 +33,7 @@ class EvaluatorAttribute(object): ...@@ -32,10 +33,7 @@ class EvaluatorAttribute(object):
FOR_UTILS = 1 << 4 FOR_UTILS = 1 << 4
KEYS = [ KEYS = [
"for_classification", "for_classification", "for_regression", "for_rank", "for_print",
"for_regression",
"for_rank",
"for_print",
"for_utils" "for_utils"
] ]
...@@ -55,10 +53,11 @@ def evaluator(*attrs): ...@@ -55,10 +53,11 @@ def evaluator(*attrs):
setattr(method, EvaluatorAttribute.to_key(attr), True) setattr(method, EvaluatorAttribute.to_key(attr), True)
method.is_evaluator = True method.is_evaluator = True
return method return method
return impl return impl
def evaluator_base(
input, def evaluator_base(input,
type, type,
label=None, label=None,
weight=None, weight=None,
...@@ -130,10 +129,10 @@ def evaluator_base( ...@@ -130,10 +129,10 @@ def evaluator_base(
result_file=result_file, result_file=result_file,
delimited=delimited) delimited=delimited)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
def classification_error_evaluator( def classification_error_evaluator(input,
input,
label, label,
name=None, name=None,
weight=None, weight=None,
...@@ -170,13 +169,14 @@ def classification_error_evaluator( ...@@ -170,13 +169,14 @@ def classification_error_evaluator(
:return: None. :return: None.
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="classification_error", type="classification_error",
input=input, input=input,
label=label, label=label,
weight=weight, weight=weight,
classification_threshold=threshold, classification_threshold=threshold, )
)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
...@@ -184,8 +184,7 @@ def auc_evaluator( ...@@ -184,8 +184,7 @@ def auc_evaluator(
input, input,
label, label,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
Auc Evaluator which adapts to binary classification. Auc Evaluator which adapts to binary classification.
...@@ -205,12 +204,14 @@ def auc_evaluator( ...@@ -205,12 +204,14 @@ def auc_evaluator(
[sample_num, 1]. [sample_num, 1].
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="last-column-auc", type="last-column-auc",
input=input, input=input,
label=label, label=label,
weight=weight) weight=weight)
@evaluator(EvaluatorAttribute.FOR_RANK) @evaluator(EvaluatorAttribute.FOR_RANK)
@wrap_name_default() @wrap_name_default()
def pnpair_evaluator( def pnpair_evaluator(
...@@ -218,8 +219,7 @@ def pnpair_evaluator( ...@@ -218,8 +219,7 @@ def pnpair_evaluator(
label, label,
info, info,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
Positive-negative pair rate Evaluator which adapts to rank task like Positive-negative pair rate Evaluator which adapts to rank task like
learning to rank. This evaluator must contain at least three layers. learning to rank. This evaluator must contain at least three layers.
...@@ -242,13 +242,15 @@ def pnpair_evaluator( ...@@ -242,13 +242,15 @@ def pnpair_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="pnpair", type="pnpair",
input=input, input=input,
label=label, label=label,
info=info, info=info,
weight=weight) weight=weight)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
def precision_recall_evaluator( def precision_recall_evaluator(
...@@ -256,8 +258,7 @@ def precision_recall_evaluator( ...@@ -256,8 +258,7 @@ def precision_recall_evaluator(
label, label,
positive_label=None, positive_label=None,
weight=None, weight=None,
name=None, name=None, ):
):
""" """
An Evaluator to calculate precision and recall, F1-score. An Evaluator to calculate precision and recall, F1-score.
It is adapt to the task with multiple labels. It is adapt to the task with multiple labels.
...@@ -286,20 +287,21 @@ def precision_recall_evaluator( ...@@ -286,20 +287,21 @@ def precision_recall_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="precision_recall", type="precision_recall",
input=input, input=input,
label=label, label=label,
positive_label=positive_label, positive_label=positive_label,
weight=weight) weight=weight)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
def ctc_error_evaluator( def ctc_error_evaluator(
input, input,
label, label,
name=None, name=None, ):
):
""" """
This evaluator is to calculate sequence-to-sequence edit distance. This evaluator is to calculate sequence-to-sequence edit distance.
...@@ -317,10 +319,9 @@ def ctc_error_evaluator( ...@@ -317,10 +319,9 @@ def ctc_error_evaluator(
label for ctc_layer label for ctc_layer
:type label: LayerOutput :type label: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="ctc_edit_distance", name=name, type="ctc_edit_distance", input=input, label=label)
input=input,
label=label)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
...@@ -328,8 +329,7 @@ def chunk_evaluator( ...@@ -328,8 +329,7 @@ def chunk_evaluator(
input, input,
name=None, name=None,
chunk_scheme=None, chunk_scheme=None,
num_chunk_types=None, num_chunk_types=None, ):
):
""" """
Chunk evaluator is used to evaluate segment labelling accuracy for a Chunk evaluator is used to evaluate segment labelling accuracy for a
sequence. It calculates the chunk detection F1 score. sequence. It calculates the chunk detection F1 score.
...@@ -375,19 +375,20 @@ def chunk_evaluator( ...@@ -375,19 +375,20 @@ def chunk_evaluator(
:type chunk_scheme: basestring :type chunk_scheme: basestring
:param num_chunk_types: number of chunk types other than "other" :param num_chunk_types: number of chunk types other than "other"
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="chunk", type="chunk",
input=input, input=input,
chunk_scheme=chunk_scheme, chunk_scheme=chunk_scheme,
num_chunk_types=num_chunk_types) num_chunk_types=num_chunk_types)
@evaluator(EvaluatorAttribute.FOR_UTILS) @evaluator(EvaluatorAttribute.FOR_UTILS)
@wrap_name_default() @wrap_name_default()
def sum_evaluator( def sum_evaluator(
input, input,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
An Evaluator to sum the result of input. An Evaluator to sum the result of input.
...@@ -405,18 +406,15 @@ def sum_evaluator( ...@@ -405,18 +406,15 @@ def sum_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(name=name, type="sum", input=input, weight=weight)
type="sum",
input=input,
weight=weight)
@evaluator(EvaluatorAttribute.FOR_UTILS) @evaluator(EvaluatorAttribute.FOR_UTILS)
@wrap_name_default() @wrap_name_default()
def column_sum_evaluator( def column_sum_evaluator(
input, input,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
This Evaluator is used to sum the last column of input. This Evaluator is used to sum the last column of input.
...@@ -431,22 +429,22 @@ def column_sum_evaluator( ...@@ -431,22 +429,22 @@ def column_sum_evaluator(
:param input: Input Layer name. :param input: Input Layer name.
:type input: LayerOutput :type input: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="last-column-sum", name=name, type="last-column-sum", input=input, weight=weight)
input=input,
weight=weight)
""" """
The following are printer Evaluators which are usually used to The following are printer Evaluators which are usually used to
print the result, like value or gradient of input layers, the print the result, like value or gradient of input layers, the
results generated in machine translation, the classification error etc. results generated in machine translation, the classification error etc.
""" """
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def value_printer_evaluator( def value_printer_evaluator(
input, input,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the values of input layers. It contains This Evaluator is used to print the values of input layers. It contains
one or more input layers. one or more input layers.
...@@ -462,16 +460,14 @@ def value_printer_evaluator( ...@@ -462,16 +460,14 @@ def value_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(name=name, type="value_printer", input=input)
type="value_printer",
input=input)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def gradient_printer_evaluator( def gradient_printer_evaluator(
input, input,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the gradient of input layers. It contains This Evaluator is used to print the gradient of input layers. It contains
one or more input layers. one or more input layers.
...@@ -487,17 +483,15 @@ def gradient_printer_evaluator( ...@@ -487,17 +483,15 @@ def gradient_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(name=name, type="gradient_printer", input=input)
type="gradient_printer",
input=input)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxid_printer_evaluator( def maxid_printer_evaluator(
input, input,
num_results=None, num_results=None,
name=None, name=None, ):
):
""" """
This Evaluator is used to print maximum top k values and their indexes This Evaluator is used to print maximum top k values and their indexes
of each row of input layers. It contains one or more input layers. of each row of input layers. It contains one or more input layers.
...@@ -517,18 +511,16 @@ def maxid_printer_evaluator( ...@@ -517,18 +511,16 @@ def maxid_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
type="max_id_printer", name=name, type="max_id_printer", input=input, num_results=num_results)
input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxframe_printer_evaluator( def maxframe_printer_evaluator(
input, input,
num_results=None, num_results=None,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the top k frames of each input layers. This Evaluator is used to print the top k frames of each input layers.
The input layers should contain sequences info or sequences type. The input layers should contain sequences info or sequences type.
...@@ -549,11 +541,13 @@ def maxframe_printer_evaluator( ...@@ -549,11 +541,13 @@ def maxframe_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="max_frame_printer", type="max_frame_printer",
input=input, input=input,
num_results=num_results) num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def seqtext_printer_evaluator( def seqtext_printer_evaluator(
...@@ -562,8 +556,7 @@ def seqtext_printer_evaluator( ...@@ -562,8 +556,7 @@ def seqtext_printer_evaluator(
id_input=None, id_input=None,
dict_file=None, dict_file=None,
delimited=None, delimited=None,
name=None, name=None, ):
):
""" """
Sequence text printer will print text according to index matrix and a Sequence text printer will print text according to index matrix and a
dictionary. There can be multiple input to this layer: dictionary. There can be multiple input to this layer:
...@@ -636,21 +629,22 @@ def seqtext_printer_evaluator( ...@@ -636,21 +629,22 @@ def seqtext_printer_evaluator(
inputs = [id_input, input] inputs = [id_input, input]
input.parents.append(id_input) input.parents.append(id_input)
evaluator_base(name=name, evaluator_base(
name=name,
type="seq_text_printer", type="seq_text_printer",
input=inputs, input=inputs,
dict_file=dict_file, dict_file=dict_file,
result_file=result_file, result_file=result_file,
delimited=delimited) delimited=delimited)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def classification_error_printer_evaluator( def classification_error_printer_evaluator(
input, input,
label, label,
threshold=0.5, threshold=0.5,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the classification error of each sample. This Evaluator is used to print the classification error of each sample.
...@@ -667,7 +661,8 @@ def classification_error_printer_evaluator( ...@@ -667,7 +661,8 @@ def classification_error_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
name=name,
type="classification_error_printer", type="classification_error_printer",
input=input, input=input,
label=label, label=label,
......
...@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger ...@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger
__all__ = [] __all__ = []
def register_unary_math_op(op_name, act): def register_unary_math_op(op_name, act):
def op(input, name=None): def op(input, name=None):
return mixed_layer(input=[identity_projection(input=input)], return mixed_layer(
name=name, input=[identity_projection(input=input)], name=name, act=act)
act=act)
op = wrap_name_default(op_name)(op) op = wrap_name_default(op_name)(op)
op.__doc__ = type(act).__doc__ op.__doc__ = type(act).__doc__
globals()[op_name] = op globals()[op_name] = op
__all__.append(op_name) __all__.append(op_name)
register_unary_math_op('exp', act.ExpActivation()) register_unary_math_op('exp', act.ExpActivation())
register_unary_math_op('log', act.LogActivation()) register_unary_math_op('log', act.LogActivation())
register_unary_math_op('abs', act.AbsActivation()) register_unary_math_op('abs', act.AbsActivation())
...@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation()) ...@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation()) register_unary_math_op('square', act.SquareActivation())
def add(layeroutput, other): def add(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
...@@ -45,8 +48,10 @@ def add(layeroutput, other): ...@@ -45,8 +48,10 @@ def add(layeroutput, other):
logger.fatal("LayerOutput can only be added with" logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number") " another LayerOutput or a number")
if layeroutput.size == other.size: if layeroutput.size == other.size:
return mixed_layer(input=[identity_projection(input=layeroutput), return mixed_layer(input=[
identity_projection(input=other)]) identity_projection(input=layeroutput),
identity_projection(input=other)
])
if other.size != 1 and layeroutput.size != 1: if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size" logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" % " or one of their sizes is 1. sizes are %s and %s" %
...@@ -56,12 +61,15 @@ def add(layeroutput, other): ...@@ -56,12 +61,15 @@ def add(layeroutput, other):
layeroutput = other layeroutput = other
other = tmp other = tmp
other = repeat_layer(other, layeroutput.size) other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[identity_projection(input=layeroutput), return mixed_layer(input=[
identity_projection(input=other)]) identity_projection(input=layeroutput), identity_projection(input=other)
])
LayerOutput.__radd__ = add LayerOutput.__radd__ = add
LayerOutput.__add__ = add LayerOutput.__add__ = add
def sub(layeroutput, other): def sub(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
...@@ -71,14 +79,18 @@ def sub(layeroutput, other): ...@@ -71,14 +79,18 @@ def sub(layeroutput, other):
neg = slope_intercept_layer(input=other, slope=-1.0) neg = slope_intercept_layer(input=other, slope=-1.0)
return add(layeroutput, neg) return add(layeroutput, neg)
LayerOutput.__sub__ = sub LayerOutput.__sub__ = sub
def rsub(layeroutput, other): def rsub(layeroutput, other):
neg = slope_intercept_layer(input=layeroutput, slope=-1.0) neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
return add(neg, other) return add(neg, other)
LayerOutput.__rsub__ = rsub LayerOutput.__rsub__ = rsub
def mul(layeroutput, other): def mul(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other) return slope_intercept_layer(input=layeroutput, slope=other)
...@@ -93,5 +105,6 @@ def mul(layeroutput, other): ...@@ -93,5 +105,6 @@ def mul(layeroutput, other):
logger.fatal("At least one of the operand of '*' must be a number" logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1") " or a LayerOutput with size=1")
LayerOutput.__mul__ = mul LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul LayerOutput.__rmul__ = mul
...@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \ ...@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \
from .default_decorators import wrap_param_default from .default_decorators import wrap_param_default
__all__ = ['Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', __all__ = [
'AdamaxOptimizer', 'AdamOptimizer', 'AdaGradOptimizer', 'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
'RMSPropOptimizer', 'DecayedAdaGradOptimizer', 'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
'AdaDeltaOptimizer', 'BaseRegularization', 'L2Regularization', 'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
'settings', 'ModelAverage'] 'L2Regularization', 'settings', 'ModelAverage'
]
class Optimizer(object): class Optimizer(object):
...@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer): ...@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer):
:param sparse: with sparse support or not. :param sparse: with sparse support or not.
:type sparse: bool :type sparse: bool
""" """
def extra_settings(self): def extra_settings(self):
default_momentum(self.momentum) default_momentum(self.momentum)
def to_setting_kwargs(self): def to_setting_kwargs(self):
if self.sparse: if self.sparse:
return { return {'learning_method': 'sparse_momentum'}
'learning_method': 'sparse_momentum'
}
else: else:
return { return {'learning_method': 'momentum'}
'learning_method': 'momentum'
}
def __init__(self, momentum=None, sparse=False): def __init__(self, momentum=None, sparse=False):
self.momentum = momentum self.momentum = momentum
...@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer): ...@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer):
""" """
def to_setting_kwargs(self): def to_setting_kwargs(self):
return { return {'learning_method': 'adagrad'}
'learning_method': 'adagrad'
}
def __init__(self): def __init__(self):
pass pass
...@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization): ...@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization):
def to_setting_kwargs(self): def to_setting_kwargs(self):
if self.algorithm == 'owlqn': if self.algorithm == 'owlqn':
return { return {'l2weight': self.decay_rate}
'l2weight': self.decay_rate
}
else: else:
return dict() return dict()
...@@ -330,7 +324,8 @@ class ModelAverage(Optimizer): ...@@ -330,7 +324,8 @@ class ModelAverage(Optimizer):
'do_average_in_cpu': self.do_average_in_cpu 'do_average_in_cpu': self.do_average_in_cpu
} }
def __init__(self, average_window, def __init__(self,
average_window,
max_average_window=None, max_average_window=None,
do_average_in_cpu=False): do_average_in_cpu=False):
self.average_window = average_window self.average_window = average_window
...@@ -356,10 +351,10 @@ def __extends__(dict1, dict2): ...@@ -356,10 +351,10 @@ def __extends__(dict1, dict2):
return dict1 return dict1
@wrap_param_default(['learning_method'], @wrap_param_default(
default_factory=lambda _: MomentumOptimizer()) ['learning_method'], default_factory=lambda _: MomentumOptimizer())
@wrap_param_default(['regularization'], @wrap_param_default(
default_factory=lambda _: BaseRegularization()) ['regularization'], default_factory=lambda _: BaseRegularization())
def settings(batch_size, def settings(batch_size,
learning_rate=1e-3, learning_rate=1e-3,
learning_rate_decay_a=0., learning_rate_decay_a=0.,
...@@ -373,8 +368,7 @@ def settings(batch_size, ...@@ -373,8 +368,7 @@ def settings(batch_size,
regularization=None, regularization=None,
is_async=False, is_async=False,
model_average=None, model_average=None,
gradient_clipping_threshold=None gradient_clipping_threshold=None):
):
""" """
Set the optimization method, learning rate, batch size, and other training Set the optimization method, learning rate, batch size, and other training
settings. The currently supported algorithms are SGD and Async-SGD. settings. The currently supported algorithms are SGD and Async-SGD.
...@@ -415,10 +409,11 @@ def settings(batch_size, ...@@ -415,10 +409,11 @@ def settings(batch_size,
else: else:
algorithm = 'owlqn' algorithm = 'owlqn'
args=['batch_size', 'learning_rate', 'learning_rate_decay_a', args = [
'learning_rate_decay_b', 'learning_rate_schedule', 'batch_size', 'learning_rate', 'learning_rate_decay_a',
'learning_rate_args', 'average_window', 'do_average_in_cpu', 'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
'max_average_window'] 'average_window', 'do_average_in_cpu', 'max_average_window'
]
kwargs = dict() kwargs = dict()
kwargs['algorithm'] = algorithm kwargs['algorithm'] = algorithm
for arg in args: for arg in args:
......
...@@ -11,18 +11,12 @@ ...@@ -11,18 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
""" """
__all__ = [ __all__ = [
"BasePoolingType", "BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling",
"MaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
"AvgPooling",
"CudnnMaxPooling",
"CudnnAvgPooling",
"SumPooling",
"SquareRootNPooling"
] ]
...@@ -36,6 +30,7 @@ class BasePoolingType(object): ...@@ -36,6 +30,7 @@ class BasePoolingType(object):
:type name: basestring :type name: basestring
""" """
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
...@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType): ...@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType):
value. None means use default value in proto. value. None means use default value in proto.
:type output_max_index: bool|None :type output_max_index: bool|None
""" """
def __init__(self, output_max_index=None): def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max") BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index self.output_max_index = output_max_index
...@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType): ...@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType):
Cudnn max pooling only support GPU. Return the maxinum value in the Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window. pooling window.
""" """
def __init__(self): def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool") BasePoolingType.__init__(self, "cudnn-max-pool")
...@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType): ...@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType):
Cudnn average pooling only support GPU. Return the average value in the Cudnn average pooling only support GPU. Return the average value in the
pooling window. pooling window.
""" """
def __init__(self): def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool") BasePoolingType.__init__(self, "cudnn-avg-pool")
class AvgPooling(BasePoolingType): class AvgPooling(BasePoolingType):
""" """
Average pooling. Average pooling.
...@@ -105,7 +104,9 @@ class SumPooling(AvgPooling): ...@@ -105,7 +104,9 @@ class SumPooling(AvgPooling):
sum(samples\\_of\\_a\\_sequence) sum(samples\\_of\\_a\\_sequence)
""" """
def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
class SquareRootNPooling(AvgPooling): class SquareRootNPooling(AvgPooling):
...@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling): ...@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling):
sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num) sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
""" """
def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-3, batch_size=1000)
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=256*256) img = data_layer(name='image', size=256 * 256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size # the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size. # is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, img_conv = img_conv_layer(
filter_size=(32, 32), padding=(1, 1), stride=(1, 1), input=img,
num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
stride=(1, 1),
act=LinearActivation()) act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
...@@ -18,5 +20,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32) ...@@ -18,5 +20,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm) outputs(img_pool, img_norm)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-3, batch_size=1000)
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=227*227) img = data_layer(name='image', size=227 * 227)
# the parse_conv in config_parse.py is not strictly accurate when filter_size # the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size. # is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, img_conv = img_conv_layer(
filter_size=(32, 32), padding=(1, 1), stride=(1, 1), input=img,
act=LinearActivation(), trans=True) num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
stride=(1, 1),
act=LinearActivation(),
trans=True)
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32) img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm) outputs(img_pool, img_norm)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=30) din = data_layer(name='data', size=30)
seq_op = [ seq_op = [first_seq, last_seq]
first_seq,
last_seq
]
agg_level = [ agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
AggregateLevel.EACH_SEQUENCE,
AggregateLevel.EACH_TIMESTEP
]
opts = [] opts = []
......
...@@ -4,18 +4,18 @@ Test all activations. ...@@ -4,18 +4,18 @@ Test all activations.
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='input', size=100) din = data_layer(name='input', size=100)
acts = [ acts = [
TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation, TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
LinearActivation, ExpActivation, ReluActivation, BReluActivation, LinearActivation, ExpActivation, ReluActivation, BReluActivation,
SoftReluActivation, STanhActivation, AbsActivation, SquareActivation] SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
]
outputs( outputs([
[fc_layer(input=din, size=100, act=act(), name="layer_%d" % i) for i, act in fc_layer(
enumerate(acts)]) input=din, size=100, act=act(), name="layer_%d" % i)
for i, act in enumerate(acts)
])
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
from paddle.trainer_config_helpers import math from paddle.trainer_config_helpers import math
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
x = data_layer(name='data', size=100) x = data_layer(name='data', size=100)
x = math.exp(x) x = math.exp(x)
...@@ -21,10 +18,9 @@ y = y - 2 ...@@ -21,10 +18,9 @@ y = y - 2
y = 2 - y y = 2 - y
y = 2 * y y = 2 * y
y = y * 3 y = y * 3
z= data_layer(name='data_2', size=1) z = data_layer(name='data_2', size=1)
y = y * z y = y * z
y = z * y y = z * y
y = y + z y = y + z
y = z + y y = z + y
outputs(y) outputs(y)
...@@ -3,10 +3,7 @@ Test mixed layer, projections and operators. ...@@ -3,10 +3,7 @@ Test mixed layer, projections and operators.
''' '''
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='test', size=100) din = data_layer(name='test', size=100)
...@@ -30,18 +27,20 @@ with mixed_layer() as m5: ...@@ -30,18 +27,20 @@ with mixed_layer() as m5:
with mixed_layer() as m6: with mixed_layer() as m6:
m6 += dotmul_operator(a=m3, b=m4) m6 += dotmul_operator(a=m3, b=m4)
img = data_layer(name='img', size=32*32) img = data_layer(name='img', size=32 * 32)
flt = data_layer(name='filter', size=3*3*1*64) flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
with mixed_layer() as m7: with mixed_layer() as m7:
m7 += conv_operator(img=img, filter=flt, num_filters=64, m7 += conv_operator(
num_channels=1, filter_size=3) img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
end = mixed_layer(input=[full_matrix_projection(input=m5), end = mixed_layer(
trans_full_matrix_projection(input=m6), input=[
full_matrix_projection(input=m7)], full_matrix_projection(input=m5),
trans_full_matrix_projection(input=m6), full_matrix_projection(input=m7)
],
size=100, size=100,
layer_attr=ExtraAttr(drop_rate=0.5, layer_attr=ExtraAttr(
error_clipping_threshold=40)) drop_rate=0.5, error_clipping_threshold=40))
outputs(end) outputs(end)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
a = data_layer(name='feature_a', size=200) a = data_layer(name='feature_a', size=200)
b = data_layer(name='feature_b', size=200) b = data_layer(name='feature_b', size=200)
...@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200) ...@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200)
fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0) fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0)
bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0) bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0)
softmax_param = ParamAttr(name='softmax_param', initial_max=1.0, initial_min=-1.0) softmax_param = ParamAttr(
name='softmax_param', initial_max=1.0, initial_min=-1.0)
hidden_a = fc_layer(input=a, size=200, param_attr=fc_param, bias_attr=bias_param) hidden_a = fc_layer(
hidden_b = fc_layer(input=b, size=200, param_attr=fc_param, bias_attr=bias_param) input=a, size=200, param_attr=fc_param, bias_attr=bias_param)
hidden_b = fc_layer(
input=b, size=200, param_attr=fc_param, bias_attr=bias_param)
predict = fc_layer(input=[hidden_a, hidden_b], param_attr=[softmax_param, softmax_param], predict = fc_layer(
bias_attr=False, size=10, act=SoftmaxActivation()) input=[hidden_a, hidden_b],
param_attr=[softmax_param, softmax_param],
bias_attr=False,
size=10,
act=SoftmaxActivation())
outputs(classification_cost(input=predict, label=data_layer(name='label', size=10))) outputs(
classification_cost(
input=predict, label=data_layer(
name='label', size=10)))
...@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2: ...@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2:
lstm_param = ParamAttr(name='lstm_param') lstm_param = ParamAttr(name='lstm_param')
lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.) lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.)
lstm1 = lstmemory_group(input=m1, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False) lstm1 = lstmemory_group(
lstm2 = lstmemory_group(input=m2, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False) input=m1,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
lstm2 = lstmemory_group(
input=m2,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
softmax_param = ParamAttr(name='softmax_param') softmax_param = ParamAttr(name='softmax_param')
predict = fc_layer(input=[last_seq(input=lstm1), last_seq(input=lstm2)], predict = fc_layer(
input=[last_seq(input=lstm1), last_seq(input=lstm2)],
size=10, size=10,
param_attr=[softmax_param, softmax_param], param_attr=[softmax_param, softmax_param],
bias_attr=False, bias_attr=False,
act=SoftmaxActivation()) act=SoftmaxActivation())
outputs(classification_cost(input=predict, label=data_layer(name='label', size=10))) outputs(
classification_cost(
input=predict, label=data_layer(
name='label', size=10)))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=200) din = data_layer(name='data', size=200)
...@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation()) ...@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True) rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True)
lstm1_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(), lstm1_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation()) lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation())
lstm2_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(), lstm2_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True) lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True)
gru1_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(), gru1_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
gru1 = grumemory(input=gru1_param, act=SigmoidActivation()) gru1 = grumemory(input=gru1_param, act=SigmoidActivation())
gru2_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(), gru2_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True) gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True)
outputs(last_seq(input=rnn), first_seq(input=rnn2), outputs(
last_seq(input=lstm1), first_seq(input=lstm2), last_seq(input=rnn),
last_seq(input=gru1), first_seq(gru2)) first_seq(input=rnn2),
last_seq(input=lstm1),
first_seq(input=lstm2),
last_seq(input=gru1),
first_seq(gru2))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=120) din = data_layer(name='data', size=120)
......
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
data = data_layer(name='data', size=2304) data = data_layer(name='data', size=2304)
conv = img_conv_layer(input=data, conv = img_conv_layer(
filter_size = 3, input=data,
filter_size=3,
num_channels=1, num_channels=1,
num_filters=16, num_filters=16,
padding=1, padding=1,
act=LinearActivation(), act=LinearActivation(),
bias_attr=True) bias_attr=True)
bilinear = bilinear_interp_layer(input=conv, bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
out_size_x=64,
out_size_y=64)
pool = img_pool_layer(input=bilinear, pool = img_pool_layer(
input=bilinear,
num_channels=4, num_channels=4,
pool_size=2, pool_size=2,
stride=2, stride=2,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册