提交 8e52b34a 编写于 作者: Y Yu Yang 提交者: GitHub

Implement FC layer with helper (#4726)

* Implement FC layer with helper

* Update LayerHelper

* Add debug string for Python ProtoBuf

and Rename `Sync` to `Flush`

* Add check of ProtoBuf initialization

* Layer wrapper for FC

* Fix unittest

* Fix CI

* Add code generator

* AttributeChecker Better error log and speicalize bool

Since lots of types can be cast to bool

* Complete mlp, fit_a_line
上级 9fc59360
......@@ -153,7 +153,8 @@ class OpProtoHolder(object):
self.op_proto_map[proto.type] = proto
def get_op_proto(self, type):
assert type in self.op_proto_map, "Operator \"%s\" has not been registered." % type
if type not in self.op_proto_map:
raise ValueError("Operator \"%s\" has not been registered." % type)
return self.op_proto_map[type]
......@@ -374,10 +375,10 @@ class Program(object):
cls._instance = cls()
return cls._instance
def __init__(self):
assert not hasattr(self.__class__,
'_instance'), 'Do not call constructor directly!'
self.desc = core.ProgramDesc.instance()
def __init__(self, desc=None):
if desc is None:
desc = core.ProgramDesc.instance()
self.desc = desc
self.blocks = [Block(self, 0)]
self.current_block_idx = 0
......@@ -428,7 +429,6 @@ class Parameter(Variable):
if each < 0:
raise ValueError("Parameter shape should not be related with "
"batch-size")
Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs)
self.trainable = kwargs.get('trainable', True)
self.init_attr = kwargs.get('initialize_attr', {
......@@ -441,7 +441,7 @@ class Parameter(Variable):
self._append_initialize_ops_()
def _append_initialize_ops_(self):
attr = copy.deepcopy(self.init_attr)
attr = self.init_attr
op_type = attr.pop('type', None)
block = self.block
assert isinstance(block, Block)
......
from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program
import paddle.v2.framework.core as core
import copy
import itertools
def unique_name(prefix):
uid = core.unique_integer() # unique during whole process.
return "_".join([prefix, str(uid)])
class LayerHelper(object):
def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs
self.layer_type = layer_type
name = self.kwargs.get('name', None)
if name is None:
self.kwargs['name'] = unique_name(self.layer_type)
@property
def name(self):
return self.kwargs['name']
@property
def program(self):
prog = self.kwargs.get('program', None)
if prog is None:
return g_program
else:
return prog
def append_op(self, *args, **kwargs):
return self.program.current_block().append_op(*args, **kwargs)
def multiple_input(self, input_param_name='input'):
inputs = self.kwargs.get(input_param_name, [])
type_error = TypeError(
"Input of {0} layer should be Variable or sequence of Variable".
format(self.layer_type))
if isinstance(inputs, Variable):
inputs = [inputs]
elif not isinstance(inputs, list) and not isinstance(inputs, tuple):
raise type_error
else:
for each in inputs:
if not isinstance(each, Variable):
raise type_error
return inputs
def input(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
if len(inputs) != 1:
raise "{0} layer only takes one input".format(self.layer_type)
return inputs[0]
@property
def param_attr(self):
default = {
'name': None,
'init_attr': {
'type': 'uniform_random',
'min': -1.0,
'max': 1.0
}
}
actual = self.kwargs.get('param_attr', None)
return actual if actual is not None else default
def bias_attr(self, size, dtype):
bias_attr = self.kwargs.get('bias_attr', False)
if bias_attr is None or bias_attr:
bias_attr = {
'name': None,
'init_attr': {
'type': 'fill_constant',
'value': 0.0,
'shape': [size],
'dataType': dtype
}
}
return bias_attr
def multiple_param_attr(self, length):
param_attr = self.param_attr
if isinstance(param_attr, dict):
param_attr = [param_attr]
if len(param_attr) != 1 and len(param_attr) != length:
raise ValueError("parameter number mismatch")
elif len(param_attr) == 1 and length != 1:
tmp = [None] * length
for i in xrange(length):
tmp[i] = copy.deepcopy(param_attr[0])
param_attr = tmp
return param_attr
def iter_inputs_and_params(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
param_attrs = self.multiple_param_attr(len(inputs))
for ipt, param_attr in itertools.izip(inputs, param_attrs):
yield ipt, param_attr
def input_dtype(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
dtype = None
for each in inputs:
if dtype is None:
dtype = each.data_type
elif dtype != each.data_type:
raise ValueError("Data Type mismatch")
return dtype
def create_parameter(self, attr, shape, dtype, suffix='w'):
if attr['name'] is None:
attr['name'] = unique_name(".".join([self.name, suffix]))
return self.program.global_block().create_parameter(
name=attr['name'],
dtype=dtype,
shape=shape,
initialize_attr=attr['init_attr'])
def create_tmp_variable(self, dtype):
return self.program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype)
def create_global_variable(self, *args, **kwargs):
return self.program.global_block().create_var(*args, **kwargs)
def append_bias_op(self, input_var):
bias_attr = self.bias_attr(
self.kwargs['size'], dtype=input_var.data_type)
if not bias_attr:
return input_var
b = self.create_parameter(
attr=bias_attr,
shape=[self.kwargs['size']],
dtype=input_var.data_type,
suffix='b')
tmp = self.create_tmp_variable(dtype=input_var.data_type)
self.append_op(
type='elementwise_add',
inputs={'X': [input_var],
'Y': [b]},
outputs={'Out': [tmp]})
return tmp
def append_activation(self, input_var):
act = self.kwargs.get('act', None)
if act is None:
return input_var
if isinstance(act, basestring):
act = {'type': act}
tmp = self.create_tmp_variable(dtype=input_var.data_type)
act_type = act.pop('type')
self.append_op(
type=act_type,
inputs={"X": [input_var]},
outputs={"Y": [tmp]},
attrs=act)
return tmp
from paddle.v2.framework.layer_helper import LayerHelper
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable
import re
__all__ = ['fc_layer', 'data_layer', 'cross_entropy']
def fc_layer(input,
size,
param_attr=None,
bias_attr=True,
name=None,
act=None,
num_flatten_dims=1,
program=None):
# create helper
helper = LayerHelper('fc', **locals())
dtype = helper.input_dtype()
# mul
mul_results = []
for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape
param_shape = list(input_shape[num_flatten_dims:]) + [size]
w = helper.create_parameter(
attr=param_attr, shape=param_shape, dtype=dtype)
tmp = helper.create_tmp_variable(dtype)
helper.append_op(
type="mul",
inputs={
"X": input_var,
"Y": w,
},
outputs={"Out": tmp},
attrs={'x_num_col_dims': num_flatten_dims})
mul_results.append(tmp)
# sum
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
# add bias
pre_activation = helper.append_bias_op(pre_bias)
# add activation
return helper.append_activation(pre_activation)
def data_layer(name,
shape,
data_type='float32',
type=core.VarDesc.VarType.LOD_TENSOR,
program=None):
helper = LayerHelper('data', **locals())
shape = [-1] + shape # append batch size as -1
return helper.create_global_variable(
name=name, shape=shape, dtype=data_type, type=type)
def _convert_(name):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def _create_op_func_(op_type):
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
if len(op_proto.outputs) != 1:
raise ValueError(
"Only one output operator can be automatically generated")
if op_proto.outputs[0].duplicable:
raise ValueError(
"Only not duplicable op can be automatically generated")
o_name = op_proto.outputs[0].name
def func(**kwargs):
helper = LayerHelper(op_type, **kwargs)
inputs = dict()
dtype = None
for ipt in op_proto.inputs:
name = _convert_(ipt.name)
val = kwargs.pop(name, [])
if not isinstance(val, list) and not isinstance(val, tuple):
val = [val]
for each in val:
if not isinstance(each, Variable):
raise ValueError("input of {0} must be variable".format(
op_type))
if dtype is None:
dtype = each.data_type
elif dtype != each.data_type:
raise ValueError(
"operator {0} must input same dtype".format(op_type))
inputs[ipt.name] = val
out = helper.create_tmp_variable(dtype=dtype)
helper.append_op(
type=op_type, inputs=inputs, outputs={o_name: [out]}, attrs=kwargs)
return out
func.__name__ = op_type
globals()[op_type] = func
global __all__
__all__.append(op_type)
_create_op_func_('mean')
def cross_entropy(input, label, **kwargs):
helper = LayerHelper('cross_entropy', **kwargs)
out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='cross_entropy',
inputs={'X': [input],
'Label': [label]},
outputs={'Y': [out]},
attrs=kwargs)
return out
def square_error_cost(input, label, **kwargs):
helper = LayerHelper('square_error_cost', **kwargs)
minus_out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='elementwise_sub',
inputs={'X': [input],
'Y': [label]},
outputs={'Out': [minus_out]})
square_out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='pow',
inputs={'X': [minus_out]},
outputs={'Y': [square_out]},
attrs={'factor': 2.0})
return square_out
from paddle.v2.framework.layers import fc_layer, data_layer, cross_entropy, mean, square_error_cost
from paddle.v2.framework.framework import Program, g_program
import paddle.v2.framework.core as core
import unittest
class TestBook(unittest.TestCase):
def test_fit_a_line(self):
pd = core.ProgramDesc.__create_program_desc__()
program = Program(desc=pd)
x = data_layer(
name='x', shape=[13], data_type='float32', program=program)
y_predict = fc_layer(input=x, size=1, act=None, program=program)
y = data_layer(
name='y', shape=[1], data_type='float32', program=program)
cost = square_error_cost(input=y_predict, label=y, program=program)
avg_cost = mean(x=cost, program=program)
self.assertIsNotNone(avg_cost)
print str(program)
def test_recognize_digits_mlp(self):
pd = core.ProgramDesc.__create_program_desc__()
program = Program(desc=pd)
# Change g_program, so the rest layers use `g_program`
images = data_layer(
name='pixel', shape=[784], data_type='float32', program=program)
label = data_layer(
name='label', shape=[1], data_type='int32', program=program)
hidden1 = fc_layer(input=images, size=128, act='relu', program=program)
hidden2 = fc_layer(input=hidden1, size=64, act='relu', program=program)
predict = fc_layer(
input=hidden2, size=10, act='softmax', program=program)
cost = cross_entropy(input=predict, label=label, program=program)
avg_cost = mean(x=cost, program=program)
self.assertIsNotNone(avg_cost)
print str(program)
if __name__ == '__main__':
unittest.main()
......@@ -16,7 +16,7 @@ class TestOperator(unittest.TestCase):
try:
block.append_op(type="no_such_op")
self.assertFail()
except AssertionError as a_err:
except ValueError as a_err:
self.assertEqual(a_err.message,
"Operator \"no_such_op\" has not been registered.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册