diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index d649e69d58961dcc43e3bf7325b0b06c832245dc..a17f988bf433078bfb4c06dab57896e2648953ce 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -153,7 +153,8 @@ class OpProtoHolder(object): self.op_proto_map[proto.type] = proto def get_op_proto(self, type): - assert type in self.op_proto_map, "Operator \"%s\" has not been registered." % type + if type not in self.op_proto_map: + raise ValueError("Operator \"%s\" has not been registered." % type) return self.op_proto_map[type] @@ -374,10 +375,10 @@ class Program(object): cls._instance = cls() return cls._instance - def __init__(self): - assert not hasattr(self.__class__, - '_instance'), 'Do not call constructor directly!' - self.desc = core.ProgramDesc.instance() + def __init__(self, desc=None): + if desc is None: + desc = core.ProgramDesc.instance() + self.desc = desc self.blocks = [Block(self, 0)] self.current_block_idx = 0 @@ -428,7 +429,6 @@ class Parameter(Variable): if each < 0: raise ValueError("Parameter shape should not be related with " "batch-size") - Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs) self.trainable = kwargs.get('trainable', True) self.init_attr = kwargs.get('initialize_attr', { @@ -441,7 +441,7 @@ class Parameter(Variable): self._append_initialize_ops_() def _append_initialize_ops_(self): - attr = copy.deepcopy(self.init_attr) + attr = self.init_attr op_type = attr.pop('type', None) block = self.block assert isinstance(block, Block) diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..26d3e04310b6b0415af31d1630575b32dce186d5 --- /dev/null +++ b/python/paddle/v2/framework/layer_helper.py @@ -0,0 +1,160 @@ +from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program +import paddle.v2.framework.core as core +import copy +import itertools + + +def unique_name(prefix): + uid = core.unique_integer() # unique during whole process. + return "_".join([prefix, str(uid)]) + + +class LayerHelper(object): + def __init__(self, layer_type, **kwargs): + self.kwargs = kwargs + self.layer_type = layer_type + name = self.kwargs.get('name', None) + if name is None: + self.kwargs['name'] = unique_name(self.layer_type) + + @property + def name(self): + return self.kwargs['name'] + + @property + def program(self): + prog = self.kwargs.get('program', None) + if prog is None: + return g_program + else: + return prog + + def append_op(self, *args, **kwargs): + return self.program.current_block().append_op(*args, **kwargs) + + def multiple_input(self, input_param_name='input'): + inputs = self.kwargs.get(input_param_name, []) + type_error = TypeError( + "Input of {0} layer should be Variable or sequence of Variable". + format(self.layer_type)) + if isinstance(inputs, Variable): + inputs = [inputs] + elif not isinstance(inputs, list) and not isinstance(inputs, tuple): + raise type_error + else: + for each in inputs: + if not isinstance(each, Variable): + raise type_error + return inputs + + def input(self, input_param_name='input'): + inputs = self.multiple_input(input_param_name) + if len(inputs) != 1: + raise "{0} layer only takes one input".format(self.layer_type) + return inputs[0] + + @property + def param_attr(self): + default = { + 'name': None, + 'init_attr': { + 'type': 'uniform_random', + 'min': -1.0, + 'max': 1.0 + } + } + actual = self.kwargs.get('param_attr', None) + return actual if actual is not None else default + + def bias_attr(self, size, dtype): + bias_attr = self.kwargs.get('bias_attr', False) + if bias_attr is None or bias_attr: + bias_attr = { + 'name': None, + 'init_attr': { + 'type': 'fill_constant', + 'value': 0.0, + 'shape': [size], + 'dataType': dtype + } + } + return bias_attr + + def multiple_param_attr(self, length): + param_attr = self.param_attr + if isinstance(param_attr, dict): + param_attr = [param_attr] + + if len(param_attr) != 1 and len(param_attr) != length: + raise ValueError("parameter number mismatch") + elif len(param_attr) == 1 and length != 1: + tmp = [None] * length + for i in xrange(length): + tmp[i] = copy.deepcopy(param_attr[0]) + param_attr = tmp + return param_attr + + def iter_inputs_and_params(self, input_param_name='input'): + inputs = self.multiple_input(input_param_name) + param_attrs = self.multiple_param_attr(len(inputs)) + for ipt, param_attr in itertools.izip(inputs, param_attrs): + yield ipt, param_attr + + def input_dtype(self, input_param_name='input'): + inputs = self.multiple_input(input_param_name) + dtype = None + for each in inputs: + if dtype is None: + dtype = each.data_type + elif dtype != each.data_type: + raise ValueError("Data Type mismatch") + return dtype + + def create_parameter(self, attr, shape, dtype, suffix='w'): + if attr['name'] is None: + attr['name'] = unique_name(".".join([self.name, suffix])) + return self.program.global_block().create_parameter( + name=attr['name'], + dtype=dtype, + shape=shape, + initialize_attr=attr['init_attr']) + + def create_tmp_variable(self, dtype): + return self.program.current_block().create_var( + name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype) + + def create_global_variable(self, *args, **kwargs): + return self.program.global_block().create_var(*args, **kwargs) + + def append_bias_op(self, input_var): + bias_attr = self.bias_attr( + self.kwargs['size'], dtype=input_var.data_type) + if not bias_attr: + return input_var + b = self.create_parameter( + attr=bias_attr, + shape=[self.kwargs['size']], + dtype=input_var.data_type, + suffix='b') + tmp = self.create_tmp_variable(dtype=input_var.data_type) + self.append_op( + type='elementwise_add', + inputs={'X': [input_var], + 'Y': [b]}, + outputs={'Out': [tmp]}) + return tmp + + def append_activation(self, input_var): + act = self.kwargs.get('act', None) + if act is None: + return input_var + if isinstance(act, basestring): + act = {'type': act} + tmp = self.create_tmp_variable(dtype=input_var.data_type) + act_type = act.pop('type') + self.append_op( + type=act_type, + inputs={"X": [input_var]}, + outputs={"Y": [tmp]}, + attrs=act) + return tmp diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..44b587b116e2ebfc5329348027492a4ee27b04e5 --- /dev/null +++ b/python/paddle/v2/framework/layers.py @@ -0,0 +1,143 @@ +from paddle.v2.framework.layer_helper import LayerHelper +import paddle.v2.framework.core as core +from paddle.v2.framework.framework import OpProtoHolder, Variable +import re + +__all__ = ['fc_layer', 'data_layer', 'cross_entropy'] + + +def fc_layer(input, + size, + param_attr=None, + bias_attr=True, + name=None, + act=None, + num_flatten_dims=1, + program=None): + # create helper + helper = LayerHelper('fc', **locals()) + + dtype = helper.input_dtype() + + # mul + mul_results = [] + for input_var, param_attr in helper.iter_inputs_and_params(): + input_shape = input_var.shape + param_shape = list(input_shape[num_flatten_dims:]) + [size] + w = helper.create_parameter( + attr=param_attr, shape=param_shape, dtype=dtype) + tmp = helper.create_tmp_variable(dtype) + helper.append_op( + type="mul", + inputs={ + "X": input_var, + "Y": w, + }, + outputs={"Out": tmp}, + attrs={'x_num_col_dims': num_flatten_dims}) + mul_results.append(tmp) + + # sum + if len(mul_results) == 1: + pre_bias = mul_results[0] + else: + pre_bias = helper.create_tmp_variable(dtype) + helper.append_op( + type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) + # add bias + pre_activation = helper.append_bias_op(pre_bias) + # add activation + return helper.append_activation(pre_activation) + + +def data_layer(name, + shape, + data_type='float32', + type=core.VarDesc.VarType.LOD_TENSOR, + program=None): + helper = LayerHelper('data', **locals()) + shape = [-1] + shape # append batch size as -1 + return helper.create_global_variable( + name=name, shape=shape, dtype=data_type, type=type) + + +def _convert_(name): + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + +def _create_op_func_(op_type): + op_proto = OpProtoHolder.instance().get_op_proto(op_type) + if len(op_proto.outputs) != 1: + raise ValueError( + "Only one output operator can be automatically generated") + + if op_proto.outputs[0].duplicable: + raise ValueError( + "Only not duplicable op can be automatically generated") + + o_name = op_proto.outputs[0].name + + def func(**kwargs): + helper = LayerHelper(op_type, **kwargs) + inputs = dict() + dtype = None + for ipt in op_proto.inputs: + name = _convert_(ipt.name) + val = kwargs.pop(name, []) + if not isinstance(val, list) and not isinstance(val, tuple): + val = [val] + for each in val: + if not isinstance(each, Variable): + raise ValueError("input of {0} must be variable".format( + op_type)) + + if dtype is None: + dtype = each.data_type + elif dtype != each.data_type: + raise ValueError( + "operator {0} must input same dtype".format(op_type)) + inputs[ipt.name] = val + + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type=op_type, inputs=inputs, outputs={o_name: [out]}, attrs=kwargs) + return out + + func.__name__ = op_type + globals()[op_type] = func + global __all__ + __all__.append(op_type) + + +_create_op_func_('mean') + + +def cross_entropy(input, label, **kwargs): + helper = LayerHelper('cross_entropy', **kwargs) + out = helper.create_tmp_variable(dtype=input.data_type) + helper.append_op( + type='cross_entropy', + inputs={'X': [input], + 'Label': [label]}, + outputs={'Y': [out]}, + attrs=kwargs) + return out + + +def square_error_cost(input, label, **kwargs): + helper = LayerHelper('square_error_cost', **kwargs) + minus_out = helper.create_tmp_variable(dtype=input.data_type) + helper.append_op( + type='elementwise_sub', + inputs={'X': [input], + 'Y': [label]}, + outputs={'Out': [minus_out]}) + + square_out = helper.create_tmp_variable(dtype=input.data_type) + helper.append_op( + type='pow', + inputs={'X': [minus_out]}, + outputs={'Y': [square_out]}, + attrs={'factor': 2.0}) + return square_out diff --git a/python/paddle/v2/framework/tests/test_layers.py b/python/paddle/v2/framework/tests/test_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..1ef2591cca066788deed8a1c0f6443850251fb80 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_layers.py @@ -0,0 +1,43 @@ +from paddle.v2.framework.layers import fc_layer, data_layer, cross_entropy, mean, square_error_cost +from paddle.v2.framework.framework import Program, g_program +import paddle.v2.framework.core as core +import unittest + + +class TestBook(unittest.TestCase): + def test_fit_a_line(self): + pd = core.ProgramDesc.__create_program_desc__() + program = Program(desc=pd) + x = data_layer( + name='x', shape=[13], data_type='float32', program=program) + y_predict = fc_layer(input=x, size=1, act=None, program=program) + + y = data_layer( + name='y', shape=[1], data_type='float32', program=program) + cost = square_error_cost(input=y_predict, label=y, program=program) + + avg_cost = mean(x=cost, program=program) + self.assertIsNotNone(avg_cost) + print str(program) + + def test_recognize_digits_mlp(self): + pd = core.ProgramDesc.__create_program_desc__() + program = Program(desc=pd) + + # Change g_program, so the rest layers use `g_program` + images = data_layer( + name='pixel', shape=[784], data_type='float32', program=program) + label = data_layer( + name='label', shape=[1], data_type='int32', program=program) + hidden1 = fc_layer(input=images, size=128, act='relu', program=program) + hidden2 = fc_layer(input=hidden1, size=64, act='relu', program=program) + predict = fc_layer( + input=hidden2, size=10, act='softmax', program=program) + cost = cross_entropy(input=predict, label=label, program=program) + avg_cost = mean(x=cost, program=program) + self.assertIsNotNone(avg_cost) + print str(program) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_operator_desc.py b/python/paddle/v2/framework/tests/test_operator_desc.py index dfe39c98f7f4fe266d5ec0c4a9ed14ab02e40e3a..af4e980b8ed6db6cb9b76de49d8dc0860f07ec80 100644 --- a/python/paddle/v2/framework/tests/test_operator_desc.py +++ b/python/paddle/v2/framework/tests/test_operator_desc.py @@ -16,7 +16,7 @@ class TestOperator(unittest.TestCase): try: block.append_op(type="no_such_op") self.assertFail() - except AssertionError as a_err: + except ValueError as a_err: self.assertEqual(a_err.message, "Operator \"no_such_op\" has not been registered.")