From c315339edb8b45ad9e456ca6ee2e7a02813f0749 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 19 Oct 2017 16:12:41 -0700 Subject: [PATCH] Update 1. Add init_program to hold initializers 2. bug fix --- paddle/operators/uniform_random_op.cc | 8 +-- python/paddle/v2/framework/framework.py | 32 ++++++------ python/paddle/v2/framework/layer_helper.py | 22 ++++++--- python/paddle/v2/framework/layers.py | 12 +++-- .../v2/framework/tests/test_fit_a_line.py | 49 +++++++++++++------ 5 files changed, 76 insertions(+), 47 deletions(-) diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 612bdd70db2..f244ddc51fa 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -53,10 +53,10 @@ class UniformRandomOp : public framework::OperatorWithKernel { PADDLE_ENFORCE( ctx->Attrs().Get("min") < ctx->Attrs().Get("max"), "uniform_random's min must less then max"); - auto& dims = ctx->Attrs().Get>("dims"); + auto& shape = ctx->Attrs().Get>("shape"); std::vector temp; - temp.reserve(dims.size()); - for (auto dim : dims) { + temp.reserve(shape.size()); + for (auto dim : shape) { temp.push_back(static_cast(dim)); } ctx->SetOutputDim("Out", framework::make_ddim(temp)); @@ -78,7 +78,7 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC(Uniform random operator. Used to initialize tensor with uniform random generator. )DOC"); - AddAttr>("dims", "the dimension of random tensor"); + AddAttr>("shape", "the dimension of random tensor"); AddAttr("min", "Minimum value of uniform random").SetDefault(-1.0f); AddAttr("max", "Maximun value of uniform random").SetDefault(1.0f); AddAttr("seed", diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 30ea5c99ddc..03a3dacf25c 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -343,6 +343,8 @@ class Block(object): def create_parameter(self, *args, **kwargs): global_block = self.program.global_block() param = Parameter(global_block, *args, **kwargs) + if 'init_attr' in kwargs: + self._prepend_initialize_ops_(param, kwargs['init_attr']) return param def append_op(self, *args, **kwargs): @@ -401,6 +403,17 @@ class Block(object): for index in range(len(self.ops)): assert self.ops[index].desc == ops_in_cpp[index] + def _prepend_initialize_ops_(self, param, init_attr): + op_type = init_attr['type'] + init_attr['shape'] = param.shape + init_attr['data_type'] = int(param.data_type) + op = self.prepend_op( + type=op_type, + inputs=None, + outputs={'Out': [param]}, + attrs=init_attr) + param.op = op + class Program(object): def __init__(self): @@ -475,27 +488,10 @@ class Parameter(Variable): Variable.__init__( self, block, persistable=True, shape=shape, dtype=dtype, **kwargs) self.trainable = kwargs.get('trainable', True) - self.init_attr = kwargs.get('initialize_attr', { - 'type': 'uniform_random', - 'min': -1.0, - 'max': 1.0 - }) self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0}) - self._append_initialize_ops_() - - def _append_initialize_ops_(self): - attr = self.init_attr - op_type = attr.pop('type', None) - block = self.block - assert isinstance(block, Block) - shape = self.shape - attr['dims'] = shape - attr['data_type'] = int(self.data_type) - op = block.prepend_op( - type=op_type, inputs=None, outputs={'Out': [self]}, attrs=attr) - self.op = op # program is a global instance. g_program = Program() +g_init_program = Program() diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index ae7255ef9ac..0d831488088 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -1,4 +1,4 @@ -from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program +from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program, g_init_program import paddle.v2.framework.core as core import copy import itertools @@ -29,6 +29,14 @@ class LayerHelper(object): else: return prog + @property + def init_program(self): + prog = self.kwargs.get('init_program', None) + if prog is None: + return g_init_program + else: + return prog + def append_op(self, *args, **kwargs): return self.program.current_block().append_op(*args, **kwargs) @@ -73,9 +81,9 @@ class LayerHelper(object): 'name': None, 'init_attr': { 'type': 'fill_constant', - 'value': 0.0, - 'shape': shape, - 'dataType': dtype + 'value': 0.0 + #'shape': shape, + #'dataType': dtype } } return bias_attr @@ -113,11 +121,13 @@ class LayerHelper(object): def create_parameter(self, attr, shape, dtype, suffix='w'): if attr['name'] is None: attr['name'] = unique_name(".".join([self.name, suffix])) - return self.program.global_block().create_parameter( + self.init_program.global_block().create_parameter( name=attr['name'], dtype=dtype, shape=shape, - initialize_attr=attr['init_attr']) + init_attr=attr['init_attr']) + return self.program.global_block().create_parameter( + name=attr['name'], dtype=dtype, shape=shape) def create_tmp_variable(self, dtype): return self.program.current_block().create_var( diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 236427efcef..ac77aefa153 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -13,7 +13,8 @@ def fc(input, name=None, act=None, num_flatten_dims=1, - program=None): + program=None, + init_program=None): # create helper helper = LayerHelper('fc', **locals()) @@ -59,7 +60,8 @@ def data(name, data_type='float32', type=core.VarDesc.VarType.LOD_TENSOR, append_batch_size=True, - program=None): + program=None, + init_program=None): helper = LayerHelper('data', **locals()) if append_batch_size: shape = [-1] + shape # append batch size as -1 @@ -160,7 +162,8 @@ def conv2d(input, padding=None, bias_attr=None, param_attr=None, - program=None): + program=None, + init_program=None): helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -207,7 +210,8 @@ def pool2d(input, pool_stride=[1, 1], pool_padding=[0, 0], global_pooling=False, - program=None): + program=None, + init_program=None): if pool_type not in ["max", "avg"]: raise ValueError( "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", diff --git a/python/paddle/v2/framework/tests/test_fit_a_line.py b/python/paddle/v2/framework/tests/test_fit_a_line.py index a686ad2010a..f4e115b57dd 100644 --- a/python/paddle/v2/framework/tests/test_fit_a_line.py +++ b/python/paddle/v2/framework/tests/test_fit_a_line.py @@ -8,24 +8,35 @@ from paddle.v2.framework.executor import Executor import numpy as np +init_program = Program() program = Program() -x = layers.data(name='x', shape=[13], data_type='float32', program=program) -y_predict = layers.fc(input=x, size=1, act=None, program=program) +x = layers.data( + name='x', + shape=[13], + data_type='float32', + program=program, + init_program=init_program) +y_predict = layers.fc(input=x, + size=1, + act=None, + program=program, + init_program=init_program) -y = layers.data(name='y', shape=[1], data_type='float32', program=program) +y = layers.data( + name='y', + shape=[1], + data_type='float32', + program=program, + init_program=init_program) -cost = layers.square_error_cost(input=y_predict, label=y, program=program) -avg_cost = layers.mean(x=cost, program=program) +cost = layers.square_error_cost( + input=y_predict, label=y, program=program, init_program=init_program) +avg_cost = layers.mean(x=cost, program=program, init_program=init_program) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) +sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.005) opts = sgd_optimizer.minimize(avg_cost) -print str(program) - -import pdb -pdb.set_trace() - -BATCH_SIZE = 100 +BATCH_SIZE = 10 train_reader = paddle.batch( paddle.reader.shuffle( @@ -35,12 +46,15 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -PASS_NUM = 200 +exe.run(init_program, + feed={}, + fetch_list=[init_program.global_block().var('fc_0.w_1')]) + +PASS_NUM = 10 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32") - #y_data = np.expand_dims(y_data, axis=1) tensor_x = core.LoDTensor() tensor_x.set(x_data, place) @@ -50,6 +64,11 @@ for pass_id in range(PASS_NUM): outs = exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, - fetch_list=[avg_cost]) + fetch_list=[ + avg_cost, program.global_block().var('fc_0.w_1'), + program.global_block().var('fc_0.w_1@GRAD') + ]) out = np.array(outs[0]) + w = np.array(outs[1]) + wg = np.array(outs[2]) print out -- GitLab