diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index f8d2f67410a6c06a1642180d2d62c881ec6bda3d..b3493fc378f4d866d16a6a0a739e0ccc8e44d97c 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -354,8 +354,8 @@ class Block(object): def create_var(self, *args, **kwargs): var = Variable(self, *args, **kwargs) - if 'init_attr' in kwargs: - self._prepend_initialize_ops_(var, kwargs['init_attr']) + if 'initializer' in kwargs: + kwargs['initializer'](var, self) return var def has_var(self, name): @@ -364,8 +364,8 @@ class Block(object): def create_parameter(self, *args, **kwargs): global_block = self.program.global_block() param = Parameter(global_block, *args, **kwargs) - if 'init_attr' in kwargs: - self._prepend_initialize_ops_(param, kwargs['init_attr']) + if 'initializer' in kwargs: + kwargs['initializer'](param, self) return param def append_op(self, *args, **kwargs): @@ -424,17 +424,6 @@ class Block(object): for index in range(len(self.ops)): assert self.ops[index].desc == ops_in_cpp[index] - def _prepend_initialize_ops_(self, param, init_attr): - op_type = init_attr['type'] - init_attr['shape'] = param.shape - init_attr['data_type'] = int(param.data_type) - op = self.prepend_op( - type=op_type, - inputs=None, - outputs={'Out': [param]}, - attrs=init_attr) - param.op = op - class Program(object): def __init__(self): diff --git a/python/paddle/v2/framework/initializer.py b/python/paddle/v2/framework/initializer.py new file mode 100644 index 0000000000000000000000000000000000000000..377d33271355651e6dbda0d31da0692f1a4c883d --- /dev/null +++ b/python/paddle/v2/framework/initializer.py @@ -0,0 +1,109 @@ +import paddle.v2.framework.framework as framework + +__all__ = ['ConstantInitializer', 'UniformInitializer'] + + +class Initializer(object): + """Base class for variable initializers + + Defines the common interface of variable initializers. + They add operations to the init program that are used + to initialize variables. Users should not use this class + directly, but need to use one of its implementations. + """ + + def __init_(self): + pass + + def __call__(self, param, block): + """Add corresponding initialization operations to the network + """ + raise NotImplementedError() + + +class ConstantInitializer(Initializer): + """Implements the constant initializer + """ + + def __init__(self, value=0.0): + """Constructor for ConstantInitializer + + Args: + value: constant value to initialize the variable + """ + assert value is not None + super(ConstantInitializer, self).__init__() + self._value = value + + def __call__(self, var, block): + """Add constant initialization ops for a variable + + Args: + var: Variable that needs to be initialized + block: The block in which initialization ops + should be added + + Returns: + the initialization op + """ + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + # Initialization Ops should be prepended and not appended + op = block.prepend_op( + type="fill_constant", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "value": self._value + }) + var.op = op + return op + + +class UniformInitializer(Initializer): + """Implements for random uniform distribution initializer + """ + + def __init__(self, low=-1.0, high=1.0, seed=0): + """Constructor for UniformInitializer + + Args: + low: lower boundary of the uniform distribution + high: upper boundary of the uniform distribution + seed: random seed + """ + assert low is not None + assert high is not None + assert seed is not None + super(UniformInitializer, self).__init__() + self._low = low + self._high = high + self._seed = seed + + def __call__(self, var, block): + """Add uniform distribution initialization ops for a variable + + Args: + var: Variable that needs to be initialized + block: The block in which initialization ops + should be added + + Returns: + the initialization op + """ + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + # Initialization Ops should be prepended and not appended + op = block.prepend_op( + type="uniform_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "min": self._low, + "max": self._high, + "seed": self._seed + }) + var.op = op + return op diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index d96dbe172c22617182e7ebf4aab175c6142352b7..c57776441c32e39bfa93a9fdf09dba238359af7b 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -5,6 +5,8 @@ import paddle.v2.framework.core as core from paddle.v2.framework.framework import Variable, g_program, \ g_init_program +from paddle.v2.framework.initializer import ConstantInitializer, \ + UniformInitializer def unique_name(prefix): @@ -66,14 +68,7 @@ class LayerHelper(object): @property def param_attr(self): - default = { - 'name': None, - 'init_attr': { - 'type': 'uniform_random', - 'min': -1.0, - 'max': 1.0 - } - } + default = {'name': None, 'initializer': UniformInitializer()} actual = self.kwargs.get('param_attr', None) if actual is None: actual = default @@ -83,13 +78,7 @@ class LayerHelper(object): return actual def bias_attr(self): - default = { - 'name': None, - 'init_attr': { - 'type': 'fill_constant', - 'value': 0.0 - } - } + default = {'name': None, 'initializer': ConstantInitializer()} bias_attr = self.kwargs.get('bias_attr', None) if bias_attr is True: bias_attr = default diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 5fdad52f2124da4568f685851d12d3660dcac78f..dab72f0195fd9f7ec8219be7867e8ddde9036db9 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -1,6 +1,7 @@ from paddle.v2.framework.layer_helper import LayerHelper, unique_name import paddle.v2.framework.core as core from paddle.v2.framework.framework import OpProtoHolder, Variable, Program +from paddle.v2.framework.initializer import ConstantInitializer import re __all__ = [ @@ -440,26 +441,12 @@ def batch_norm(input, else: raise ValueError("unsupported data layout:" + data_layout) - def get_init_attr(value): - if not isinstance(value, float): - raise ValueError("attr value should be a float") - return {'type': 'fill_constant', 'value': value} - - def prepend_init_op(var, init_attr): - assert isinstance(var, Variable) - op_type = init_attr['type'] - init_attr['shape'] = var.shape - init_attr['data_type'] = int(var.data_type) - op = var.block.prepend_op( - type=op_type, inputs=None, outputs={'Out': [var]}, attrs=init_attr) - return op - - def create_persistable_var(dtype, shape, init_attr=None): + def create_persistable_var(dtype, shape, initializer=None): name = unique_name(".".join([helper.name, "xxxx"])) var = init_program.global_block().create_var( dtype=dtype, shape=shape, name=name, persistable=True) - if 'init_attr' is not None: - prepend_init_op(var, init_attr) + if initializer is not None: + initializer(var, var.block) return program.global_block().create_var( name=name, dtype=dtype, shape=shape, persistable=True) @@ -472,8 +459,9 @@ def batch_norm(input, attr=helper.param_attr, shape=param_shape, dtype=dtype) # create input - mean = create_persistable_var(dtype, param_shape, get_init_attr(0.0)) - variance = create_persistable_var(dtype, param_shape, get_init_attr(1.0)) + mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0)) + variance = create_persistable_var(dtype, param_shape, + ConstantInitializer(1.0)) # create output # mean and mean_out share the same memory diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py index a8a34b2a952c8d374089ab8142b530610b2afe59..9916569d042d5d5077732281487ca3ae4366b39c 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py +++ b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py @@ -3,9 +3,10 @@ import paddle.v2.framework.layers as layers import paddle.v2.framework.core as core import paddle.v2.framework.optimizer as optimizer -from paddle.v2.framework.framework import Program, g_program +from paddle.v2.framework.framework import Program from paddle.v2.framework.executor import Executor from paddle.v2.framework.regularizer import L2DecayRegularizer +from paddle.v2.framework.initializer import UniformInitializer import numpy as np @@ -21,11 +22,8 @@ image = layers.data( param_attr = { 'name': None, - 'init_attr': { - 'type': 'uniform_random', - 'min': -1.0, - 'max': 1.0 - }, + 'initializer': UniformInitializer( + low=-1.0, high=1.0), 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) }