提交 e41f28cb 编写于 作者: A Abhinav Arora 提交者: zchen0211

Adding a framework for variable initializers (#5232)

上级 ddde829a
...@@ -354,8 +354,8 @@ class Block(object): ...@@ -354,8 +354,8 @@ class Block(object):
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
var = Variable(self, *args, **kwargs) var = Variable(self, *args, **kwargs)
if 'init_attr' in kwargs: if 'initializer' in kwargs:
self._prepend_initialize_ops_(var, kwargs['init_attr']) kwargs['initializer'](var, self)
return var return var
def has_var(self, name): def has_var(self, name):
...@@ -364,8 +364,8 @@ class Block(object): ...@@ -364,8 +364,8 @@ class Block(object):
def create_parameter(self, *args, **kwargs): def create_parameter(self, *args, **kwargs):
global_block = self.program.global_block() global_block = self.program.global_block()
param = Parameter(global_block, *args, **kwargs) param = Parameter(global_block, *args, **kwargs)
if 'init_attr' in kwargs: if 'initializer' in kwargs:
self._prepend_initialize_ops_(param, kwargs['init_attr']) kwargs['initializer'](param, self)
return param return param
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
...@@ -424,17 +424,6 @@ class Block(object): ...@@ -424,17 +424,6 @@ class Block(object):
for index in range(len(self.ops)): for index in range(len(self.ops)):
assert self.ops[index].desc == ops_in_cpp[index] assert self.ops[index].desc == ops_in_cpp[index]
def _prepend_initialize_ops_(self, param, init_attr):
op_type = init_attr['type']
init_attr['shape'] = param.shape
init_attr['data_type'] = int(param.data_type)
op = self.prepend_op(
type=op_type,
inputs=None,
outputs={'Out': [param]},
attrs=init_attr)
param.op = op
class Program(object): class Program(object):
def __init__(self): def __init__(self):
......
import paddle.v2.framework.framework as framework
__all__ = ['ConstantInitializer', 'UniformInitializer']
class Initializer(object):
"""Base class for variable initializers
Defines the common interface of variable initializers.
They add operations to the init program that are used
to initialize variables. Users should not use this class
directly, but need to use one of its implementations.
"""
def __init_(self):
pass
def __call__(self, param, block):
"""Add corresponding initialization operations to the network
"""
raise NotImplementedError()
class ConstantInitializer(Initializer):
"""Implements the constant initializer
"""
def __init__(self, value=0.0):
"""Constructor for ConstantInitializer
Args:
value: constant value to initialize the variable
"""
assert value is not None
super(ConstantInitializer, self).__init__()
self._value = value
def __call__(self, var, block):
"""Add constant initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended
op = block.prepend_op(
type="fill_constant",
outputs={"Out": var},
attrs={
"shape": var.shape,
"data_type": int(var.data_type),
"value": self._value
})
var.op = op
return op
class UniformInitializer(Initializer):
"""Implements for random uniform distribution initializer
"""
def __init__(self, low=-1.0, high=1.0, seed=0):
"""Constructor for UniformInitializer
Args:
low: lower boundary of the uniform distribution
high: upper boundary of the uniform distribution
seed: random seed
"""
assert low is not None
assert high is not None
assert seed is not None
super(UniformInitializer, self).__init__()
self._low = low
self._high = high
self._seed = seed
def __call__(self, var, block):
"""Add uniform distribution initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended
op = block.prepend_op(
type="uniform_random",
outputs={"Out": var},
attrs={
"shape": var.shape,
"data_type": int(var.data_type),
"min": self._low,
"max": self._high,
"seed": self._seed
})
var.op = op
return op
...@@ -5,6 +5,8 @@ import paddle.v2.framework.core as core ...@@ -5,6 +5,8 @@ import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Variable, g_program, \ from paddle.v2.framework.framework import Variable, g_program, \
g_init_program g_init_program
from paddle.v2.framework.initializer import ConstantInitializer, \
UniformInitializer
def unique_name(prefix): def unique_name(prefix):
...@@ -66,14 +68,7 @@ class LayerHelper(object): ...@@ -66,14 +68,7 @@ class LayerHelper(object):
@property @property
def param_attr(self): def param_attr(self):
default = { default = {'name': None, 'initializer': UniformInitializer()}
'name': None,
'init_attr': {
'type': 'uniform_random',
'min': -1.0,
'max': 1.0
}
}
actual = self.kwargs.get('param_attr', None) actual = self.kwargs.get('param_attr', None)
if actual is None: if actual is None:
actual = default actual = default
...@@ -83,13 +78,7 @@ class LayerHelper(object): ...@@ -83,13 +78,7 @@ class LayerHelper(object):
return actual return actual
def bias_attr(self): def bias_attr(self):
default = { default = {'name': None, 'initializer': ConstantInitializer()}
'name': None,
'init_attr': {
'type': 'fill_constant',
'value': 0.0
}
}
bias_attr = self.kwargs.get('bias_attr', None) bias_attr = self.kwargs.get('bias_attr', None)
if bias_attr is True: if bias_attr is True:
bias_attr = default bias_attr = default
......
from paddle.v2.framework.layer_helper import LayerHelper, unique_name from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program from paddle.v2.framework.framework import OpProtoHolder, Variable, Program
from paddle.v2.framework.initializer import ConstantInitializer
import re import re
__all__ = [ __all__ = [
...@@ -440,26 +441,12 @@ def batch_norm(input, ...@@ -440,26 +441,12 @@ def batch_norm(input,
else: else:
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def get_init_attr(value): def create_persistable_var(dtype, shape, initializer=None):
if not isinstance(value, float):
raise ValueError("attr value should be a float")
return {'type': 'fill_constant', 'value': value}
def prepend_init_op(var, init_attr):
assert isinstance(var, Variable)
op_type = init_attr['type']
init_attr['shape'] = var.shape
init_attr['data_type'] = int(var.data_type)
op = var.block.prepend_op(
type=op_type, inputs=None, outputs={'Out': [var]}, attrs=init_attr)
return op
def create_persistable_var(dtype, shape, init_attr=None):
name = unique_name(".".join([helper.name, "xxxx"])) name = unique_name(".".join([helper.name, "xxxx"]))
var = init_program.global_block().create_var( var = init_program.global_block().create_var(
dtype=dtype, shape=shape, name=name, persistable=True) dtype=dtype, shape=shape, name=name, persistable=True)
if 'init_attr' is not None: if initializer is not None:
prepend_init_op(var, init_attr) initializer(var, var.block)
return program.global_block().create_var( return program.global_block().create_var(
name=name, dtype=dtype, shape=shape, persistable=True) name=name, dtype=dtype, shape=shape, persistable=True)
...@@ -472,8 +459,9 @@ def batch_norm(input, ...@@ -472,8 +459,9 @@ def batch_norm(input,
attr=helper.param_attr, shape=param_shape, dtype=dtype) attr=helper.param_attr, shape=param_shape, dtype=dtype)
# create input # create input
mean = create_persistable_var(dtype, param_shape, get_init_attr(0.0)) mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0))
variance = create_persistable_var(dtype, param_shape, get_init_attr(1.0)) variance = create_persistable_var(dtype, param_shape,
ConstantInitializer(1.0))
# create output # create output
# mean and mean_out share the same memory # mean and mean_out share the same memory
......
...@@ -3,9 +3,10 @@ import paddle.v2.framework.layers as layers ...@@ -3,9 +3,10 @@ import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
from paddle.v2.framework.regularizer import L2DecayRegularizer from paddle.v2.framework.regularizer import L2DecayRegularizer
from paddle.v2.framework.initializer import UniformInitializer
import numpy as np import numpy as np
...@@ -21,11 +22,8 @@ image = layers.data( ...@@ -21,11 +22,8 @@ image = layers.data(
param_attr = { param_attr = {
'name': None, 'name': None,
'init_attr': { 'initializer': UniformInitializer(
'type': 'uniform_random', low=-1.0, high=1.0),
'min': -1.0,
'max': 1.0
},
'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE)
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册