提交 6605f3f3 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #2400 from emailweixu/param_initializer

Parameter initializer in V2 API
...@@ -126,6 +126,7 @@ def init_config_environment( ...@@ -126,6 +126,7 @@ def init_config_environment(
g_config=TrainerConfig(), g_config=TrainerConfig(),
g_layer_map={}, g_layer_map={},
g_parameter_map={}, g_parameter_map={},
g_parameter_initializer_map={},
g_extended_config_funcs={}, g_extended_config_funcs={},
# store command args of paddle_trainer # store command args of paddle_trainer
...@@ -439,22 +440,22 @@ def model_type(name): ...@@ -439,22 +440,22 @@ def model_type(name):
@config_class @config_class
class Bias(Cfg): class Bias(Cfg):
def __init__( def __init__(self,
self, parameter_name=None,
parameter_name=None, learning_rate=None,
learning_rate=None, momentum=None,
momentum=None, decay_rate=None,
decay_rate=None, decay_rate_l1=None,
decay_rate_l1=None, initial_mean=None,
initial_mean=None, initial_std=None,
initial_std=None, initial_strategy=None,
initial_strategy=None, initial_smart=None,
initial_smart=None, num_batches_regularization=None,
num_batches_regularization=None, sparse_remote_update=None,
sparse_remote_update=None, gradient_clipping_threshold=None,
gradient_clipping_threshold=None, is_static=None,
is_static=None, is_shared=None,
is_shared=None, ): initializer=None):
self.add_keys(locals()) self.add_keys(locals())
...@@ -465,6 +466,7 @@ class Input(Cfg): ...@@ -465,6 +466,7 @@ class Input(Cfg):
self, self,
input_layer_name, input_layer_name,
parameter_name=None, parameter_name=None,
initializer=None,
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
decay_rate=None, decay_rate=None,
...@@ -521,6 +523,7 @@ class Projection(Input): ...@@ -521,6 +523,7 @@ class Projection(Input):
initial_std=None, initial_std=None,
initial_strategy=None, initial_strategy=None,
initial_smart=None, initial_smart=None,
initializer=None,
num_batches_regularization=None, num_batches_regularization=None,
sparse_remote_update=None, sparse_remote_update=None,
sparse_update=None, sparse_update=None,
...@@ -1479,7 +1482,8 @@ class LayerBase(object): ...@@ -1479,7 +1482,8 @@ class LayerBase(object):
gradient_clipping_threshold=bias. gradient_clipping_threshold=bias.
gradient_clipping_threshold, gradient_clipping_threshold,
is_static=bias.is_static, is_static=bias.is_static,
is_shared=bias.is_shared, ) is_shared=bias.is_shared,
initializer=bias.initializer)
if for_self: if for_self:
self.config.bias_parameter_name = bias.parameter_name self.config.bias_parameter_name = bias.parameter_name
else: else:
...@@ -1536,7 +1540,8 @@ class LayerBase(object): ...@@ -1536,7 +1540,8 @@ class LayerBase(object):
format=format, format=format,
is_static=input_config.is_static, is_static=input_config.is_static,
is_shared=input_config.is_shared, is_shared=input_config.is_shared,
update_hooks=input_config.update_hooks) update_hooks=input_config.update_hooks,
initializer=input_config.initializer)
def set_layer_size(self, size): def set_layer_size(self, size):
if self.config.size == 0: if self.config.size == 0:
...@@ -3221,7 +3226,8 @@ def Parameter(name, ...@@ -3221,7 +3226,8 @@ def Parameter(name,
need_compact=None, need_compact=None,
is_static=None, is_static=None,
is_shared=None, is_shared=None,
update_hooks=None): update_hooks=None,
initializer=None):
config_assert(name not in g_parameter_map, config_assert(name not in g_parameter_map,
'Duplicated parameter name: ' + name) 'Duplicated parameter name: ' + name)
...@@ -3309,6 +3315,11 @@ def Parameter(name, ...@@ -3309,6 +3315,11 @@ def Parameter(name,
para.update_hooks.extend(update_hooks) para.update_hooks.extend(update_hooks)
g_parameter_map[name] = para g_parameter_map[name] = para
if initializer is not None:
config_assert(
callable(initializer),
"parameter initializer should be a callable object")
g_parameter_initializer_map[name] = initializer
@config_func @config_func
......
...@@ -95,6 +95,10 @@ class ParameterAttribute(object): ...@@ -95,6 +95,10 @@ class ParameterAttribute(object):
:param sparse_update: Enable sparse update for this parameter. It will :param sparse_update: Enable sparse update for this parameter. It will
enable both local and remote sparse update. enable both local and remote sparse update.
:type sparse_update: bool :type sparse_update: bool
:param initializer: If not None, it should be a callable object which accepts
a parameter name and returns numpy array for the initial
value of the parameter
:param initializer: callable object
""" """
def __init__(self, def __init__(self,
...@@ -109,7 +113,8 @@ class ParameterAttribute(object): ...@@ -109,7 +113,8 @@ class ParameterAttribute(object):
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
gradient_clipping_threshold=None, gradient_clipping_threshold=None,
sparse_update=False): sparse_update=False,
initializer=None):
self.attr = {} self.attr = {}
if is_static: if is_static:
...@@ -161,6 +166,8 @@ class ParameterAttribute(object): ...@@ -161,6 +166,8 @@ class ParameterAttribute(object):
is_compatible_with(gradient_clipping_threshold, float): is_compatible_with(gradient_clipping_threshold, float):
self.attr['gradient_clipping_threshold'] = \ self.attr['gradient_clipping_threshold'] = \
gradient_clipping_threshold gradient_clipping_threshold
if initializer is not None:
self.attr['initializer'] = initializer
def set_default_parameter_name(self, name): def set_default_parameter_name(self, name):
""" """
......
import numpy as np import numpy as np
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import paddle.trainer.config_parser as cp
import struct import struct
import tarfile import tarfile
import cStringIO import cStringIO
...@@ -18,8 +19,11 @@ def create(layers): ...@@ -18,8 +19,11 @@ def create(layers):
""" """
topology = Topology(layers) topology = Topology(layers)
pool = Parameters() pool = Parameters()
initializers = cp.g_parameter_initializer_map
for param in topology.proto().parameters: for param in topology.proto().parameters:
pool.__append_config__(param) pool.__append_config__(param)
if param.name in initializers:
pool[param.name] = initializers[param.name](param.name)
return pool return pool
......
...@@ -11,6 +11,9 @@ except ImportError: ...@@ -11,6 +11,9 @@ except ImportError:
sys.exit(0) sys.exit(0)
import paddle.v2.parameters as parameters import paddle.v2.parameters as parameters
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.v2.attr import ParamAttr
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import random import random
import cStringIO import cStringIO
...@@ -55,6 +58,25 @@ class TestParameters(unittest.TestCase): ...@@ -55,6 +58,25 @@ class TestParameters(unittest.TestCase):
p1 = params_dup.get(name) p1 = params_dup.get(name)
self.assertTrue(numpy.isclose(p0, p1).all()) self.assertTrue(numpy.isclose(p0, p1).all())
def test_initializer(self):
def initializer(name):
assert name == "fc.w"
mat = numpy.ones((3, 2), dtype=numpy.float32)
mat[1, 1] = 2
return mat
x = layer.data(name="x", type=data_type.dense_vector(3))
y = layer.fc(x,
size=2,
bias_attr=False,
param_attr=ParamAttr(
name="fc.w", initializer=initializer))
params = parameters.create(y)
val = params["fc.w"]
assert val.shape == (3, 2)
expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32)
assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册