diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 9677c9568c6783921545364bca7b2c9c0041d823..c033b27beab52a979c78caeba68990c95b462c56 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -13,13 +13,14 @@ import nets import optimizer import backward import regularizer +from param_attr import ParamAttr from core import LoDTensor, CPUPlace, GPUPlace Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', - 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor' + 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr' ] diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 7762b0d88f3a62c3b919d64a6565bfc3951c4e25..5b384e5cf5df5e5abc7f0ef81ff11cd8a31cfa2d 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,8 +1,10 @@ import copy import itertools -from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating +from framework import Variable, default_main_program, default_startup_program, \ + unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier +from param_attr import ParamAttr class LayerHelper(object): @@ -59,31 +61,15 @@ class LayerHelper(object): @property def param_attr(self): - default = {'name': None} - actual = self.kwargs.get('param_attr', None) - if actual is None: - actual = default - for default_field in default.keys(): - if default_field not in actual: - actual[default_field] = default[default_field] - return actual + return ParamAttr.to_attr(self.kwargs.get('param_attr', None)) @property def bias_attr(self): - default = {'name': None} - bias_attr = self.kwargs.get('bias_attr', None) - if bias_attr is None: - bias_attr = default - - if isinstance(bias_attr, dict): - for default_field in default.keys(): - if default_field not in bias_attr: - bias_attr[default_field] = default[default_field] - return bias_attr + return ParamAttr.to_attr(self.kwargs.get('bias_attr', None)) def multiple_param_attr(self, length): param_attr = self.param_attr - if isinstance(param_attr, dict): + if isinstance(param_attr, ParamAttr): param_attr = [param_attr] if len(param_attr) != 1 and len(param_attr) != length: @@ -111,23 +97,30 @@ class LayerHelper(object): raise ValueError("Data Type mismatch") return dtype - def create_parameter(self, attr, shape, dtype, suffix='w', - initializer=None): + def create_parameter(self, + attr, + shape, + dtype, + is_bias=False, + default_initializer=None): # Deepcopy the attr so that parameters can be shared in program - attr_copy = copy.deepcopy(attr) - if initializer is not None: - attr_copy['initializer'] = initializer + assert isinstance(attr, ParamAttr) + suffix = 'b' if is_bias else 'w' + + if default_initializer is None: + if is_bias: + attr.set_default_bias_initializer() + else: + attr.set_default_param_initializer() else: - attr_copy['initializer'] = self._get_default_initializer(dtype) - if attr_copy['name'] is None: - attr_copy['name'] = unique_name(".".join([self.name, suffix])) + attr.set_default_initializer(default_initializer) + if attr.name is None: + attr.name = unique_name(".".join([self.name, suffix])) + self.startup_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr_copy) + dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) return self.main_program.global_block().create_parameter( - name=attr_copy['name'], - dtype=dtype, - shape=shape, - trainable=attr_copy.get('trainable', True)) + dtype=dtype, shape=shape, **attr.to_kwargs()) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( @@ -152,11 +145,7 @@ class LayerHelper(object): persistable=True, initializer=initializer) - def append_bias_op(self, - input_var, - bias_initializer, - dim_start=1, - dim_end=None): + def append_bias_op(self, input_var, dim_start=1, dim_end=None): """ Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var @@ -176,11 +165,7 @@ class LayerHelper(object): return input_var b = self.create_parameter( - attr=bias_attr, - shape=size, - dtype=input_var.dtype, - suffix='b', - initializer=bias_initializer) + attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True) tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 6adfac3a32c7c8ecf035ba3a8f757a0efb6f9b68..9dcc11d21618ec12ac6a2112ed8e307ab028f6c0 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -5,6 +5,7 @@ from initializer import Constant, Normal, Xavier, Initializer from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO +from param_attr import ParamAttr __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', @@ -17,9 +18,7 @@ def fc(input, size, num_flatten_dims=1, param_attr=None, - param_initializer=None, bias_attr=None, - bias_initializer=None, act=None, name=None, main_program=None, @@ -54,23 +53,10 @@ def fc(input, to the LayerHelper constructor. """ - - def _get_default_param_initializer(): - return Xavier() - - def _get_default_bias_initializer(): - return Constant() - helper = LayerHelper('fc', **locals()) dtype = helper.input_dtype() - if param_initializer is None: - param_initializer = _get_default_param_initializer() - - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() - mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape @@ -78,10 +64,7 @@ def fc(input, reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) ] + [size] w = helper.create_parameter( - attr=param_attr, - initializer=param_initializer, - shape=param_shape, - dtype=dtype) + attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False) tmp = helper.create_tmp_variable(dtype) helper.append_op( type="mul", @@ -102,7 +85,7 @@ def fc(input, helper.append_op( type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) # add bias - pre_activation = helper.append_bias_op(pre_bias, bias_initializer) + pre_activation = helper.append_bias_op(pre_bias) # add activation return helper.append_activation(pre_activation) @@ -110,7 +93,6 @@ def fc(input, def embedding(input, size, is_sparse=False, - param_initializer=None, param_attr=None, dtype='float32', main_program=None, @@ -119,6 +101,7 @@ def embedding(input, Embedding Layer. Args: + param_initializer: input: The input to the function size: The size of the layer is_sparse: A flag that decleares whether the input is sparse @@ -136,15 +119,9 @@ def embedding(input, """ - def _get_default_param_initializer(): - return Xavier() - helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( - attr=helper.param_attr, - shape=size, - dtype=dtype, - initializer=param_initializer or _get_default_param_initializer()) + attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) tmp = helper.create_tmp_variable(dtype) helper.append_op( type='lookup_table', @@ -176,7 +153,7 @@ def dynamic_lstm(input, if not use_peepholes: bias_size[1] = 4 * size bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, suffix='b') + attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) hidden = helper.create_tmp_variable(dtype) cell = helper.create_tmp_variable(dtype) @@ -471,19 +448,14 @@ def sums(input, out=None, main_program=None, startup_program=None): def linear_chain_crf(input, label, param_attr=None, - param_initializer=None, main_program=None, startup_program=None): - def _get_default_param_initializer(): - return Xavier() - helper = LayerHelper('linear_chain_crf', **locals()) size = input.shape[1] transition = helper.create_parameter( attr=helper.param_attr, shape=[size + 2, size], - dtype=helper.input_dtype(), - initializer=param_initializer or _get_default_param_initializer()) + dtype=helper.input_dtype()) alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -646,9 +618,7 @@ def sequence_conv(input, filter_stride=1, padding=None, bias_attr=None, - bias_initializer=None, param_attr=None, - param_initializer=None, act=None, main_program=None, startup_program=None): @@ -658,30 +628,15 @@ def sequence_conv(input, in the input parameters to the function. """ - def _get_default_bias_initializer(): - return Constant() - - def _get_default_param_initializer(): - return Xavier() - # FIXME(dzh) : want to unify the argument of python layer # function. So we ignore some unecessary attributes. # such as, padding_trainable, context_start. helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() - - if param_initializer is None: - param_initializer = _get_default_param_initializer() - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() - filter_shape = [filter_size * input.shape[1], num_filters] filter = helper.create_parameter( - attr=helper.param_attr, - shape=filter_shape, - dtype=dtype, - initializer=param_initializer) + attr=helper.param_attr, shape=filter_shape, dtype=dtype) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -696,7 +651,7 @@ def sequence_conv(input, 'contextStart': -int(filter_size / 2), 'contextLength': filter_size }) - pre_act = helper.append_bias_op(pre_bias, bias_initializer) + pre_act = helper.append_bias_op(pre_bias) return helper.append_activation(pre_act) @@ -707,9 +662,7 @@ def conv2d(input, padding=None, groups=None, param_attr=None, - param_initializer=None, bias_attr=None, - bias_initializer=None, act=None, name=None, main_program=None, @@ -722,13 +675,6 @@ def conv2d(input, conv-2d output, if mentioned in the input parameters. """ - def _get_default_bias_initializer(): - return Constant() - - def _get_default_param_initializer(filter_size, num_channels): - std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 - return Normal(0.0, std, 0) - helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -750,17 +696,16 @@ def conv2d(input, input_shape = input.shape filter_shape = [num_filters, num_filter_channels] + filter_size - if param_initializer is None: - param_initializer = _get_default_param_initializer(filter_size, - num_channels) - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() + def _get_default_param_initializer(): + std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + return Normal(0.0, std, 0) filter = helper.create_parameter( attr=helper.param_attr, shape=filter_shape, dtype=dtype, - initializer=param_initializer) + default_initializer=_get_default_param_initializer()) + pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -774,8 +719,7 @@ def conv2d(input, 'paddings': padding, 'groups': groups}) - pre_act = helper.append_bias_op( - pre_bias, bias_initializer, dim_start=1, dim_end=2) + pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) return helper.append_activation(pre_act) @@ -876,12 +820,10 @@ def batch_norm(input, attr=helper.param_attr, shape=param_shape, dtype=dtype, - initializer=Constant(1.0)) + default_initializer=Constant(1.0)) + bias = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - initializer=Constant(0.0)) + attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=True) mean = helper.create_global_variable( dtype=input.dtype, shape=param_shape, persistable=True) @@ -1356,7 +1298,7 @@ def lod_rank_table(x, level=0, main_program=None): def max_sequence_len(rank_table, main_program=None): """ - This function creates an operator to calculate the length of + This function creates an operator to calculate the length of max seqence through input rank_table(should be a lod_rank_table) """ helper = LayerHelper("max_seqence_len", **locals()) @@ -1594,35 +1536,33 @@ def conv2d_transpose(input, padding=None, stride=None, param_attr=None, - param_initializer=None, main_program=None, startup_program=None): """ The transpose of conv2d layer. - + This layer is also known as deconvolution layer. - + Args: input(Variable): The input image with [N, C, H, W] format. num_filters(int): The number of filter. It is as same as the output image channel. output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). This + tuple, it must contain two integers, (image_H, image_W). This parameter only works when filter_size is None. filter_size(int|tuple|None): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, the filter will be a square. None if use output size to calculate filter_size padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the + contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. stride(int|tuple): The stride size. If stride is a tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. param_attr: Parameter Attribute. - param_initializer(Initializer): Parameter Initializer. Default is Xavier main_program(Program): the main program - startup_program(Program): the startup program + startup_program(Program): the startup program Returns: Variable: Output image. @@ -1663,10 +1603,7 @@ def conv2d_transpose(input, filter_shape = [input_channel, num_filters] + filter_size img_filter = helper.create_parameter( - dtype=input.dtype, - shape=filter_shape, - attr=helper.param_attr, - initializer=param_initializer) + dtype=input.dtype, shape=filter_shape, attr=helper.param_attr) out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( @@ -1675,6 +1612,7 @@ def conv2d_transpose(input, 'Filter': [img_filter]}, outputs={'Output': out}, attrs=op_attr) + return out diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py new file mode 100644 index 0000000000000000000000000000000000000000..86088fdd7ce17b8b7a9688dc838e69b2aa754013 --- /dev/null +++ b/python/paddle/v2/fluid/param_attr.py @@ -0,0 +1,61 @@ +from initializer import Initializer, Xavier, Constant +from regularizer import WeightDecayRegularizer + + +class ParamAttr(object): + def __init__(self, + name=None, + initializer=None, + learning_rate=1.0, + regularizer=None, + trainable=True): + self.name = name + self.initializer = initializer + self.learning_rate = learning_rate + self.regularizer = regularizer + self.trainable = trainable + + def set_default_initializer(self, initializer): + if initializer is None: + if self.initializer is None: + raise ValueError("ParamAttr.initializer is not set") + return + + if self.initializer is not None: + return + + self.initializer = initializer + + def set_default_param_initializer(self): + self.set_default_initializer(Xavier()) + + def set_default_bias_initializer(self): + self.set_default_initializer(Constant(0.0)) + + @staticmethod + def to_attr(arg): + if arg is None: + return ParamAttr() + elif isinstance(arg, ParamAttr): + return arg + elif isinstance(arg, str) or isinstance(arg, unicode): + return ParamAttr(name=arg) + elif isinstance(arg, Initializer): + return ParamAttr(initializer=arg) + elif isinstance(arg, WeightDecayRegularizer): + return ParamAttr(regularizer=arg) + elif isinstance(arg, bool): + return ParamAttr.to_attr(None) if arg else False + else: + raise TypeError("{0} cast to ParamAttr".format(type(arg))) + + def to_kwargs(self, with_initializer=False): + kwargs = { + 'name': self.name, + 'learning_rate': self.learning_rate, + 'regularizer': self.regularizer, + 'trainable': self.trainable + } + if with_initializer: + kwargs['initializer'] = self.initializer + return kwargs diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 93987a2b80dc9ca304a708d4799bc38b448a68c4..bcd6f4d6bc66fd01406332bd1d6d7a5c4b0ddb5a 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -44,7 +44,7 @@ def db_lstm(): size=[pred_len, word_dim], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'vemb'}) + param_attr='vemb') mark_embedding = fluid.layers.embedding( input=mark, @@ -57,8 +57,8 @@ def db_lstm(): fluid.layers.embedding( size=[word_dict_len, word_dim], input=x, - param_attr={'name': embedding_name, - 'trainable': False}) for x in word_input + param_attr=fluid.ParamAttr( + name=embedding_name, trainable=False)) for x in word_input ] emb_layers.append(predicate_embedding) emb_layers.append(mark_embedding) @@ -125,8 +125,8 @@ def main(): crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, - param_attr={"name": 'crfw', - "learning_rate": mix_hidden_lr}) + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(x=crf_cost) # TODO(qiao) # 1. add crf_decode_layer and evaluator diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 8ca45134dc01ec21e720ca46c8ad020128aa6e04..fa18965aac667c0829b9e6ee56ece585564f9060 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -6,24 +6,21 @@ import paddle.v2.fluid as fluid BATCH_SIZE = 128 image = fluid.layers.data(name='x', shape=[784], dtype='float32') -param_attr = { - 'name': None, - 'regularization': fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE) -} +regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE) hidden1 = fluid.layers.fc(input=image, size=128, act='relu', - param_attr=param_attr) + param_attr=regularizer) hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu', - param_attr=param_attr) + param_attr=regularizer) predict = fluid.layers.fc(input=hidden2, size=10, act='softmax', - param_attr=param_attr) + param_attr=regularizer) label = fluid.layers.data(name='y', shape=[1], dtype='int64') diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index f8dc1518579d5a9d7a8d0498dcc5fd8a6d1692c4..db91ca4f9c7d17fb51fc5d65a0464e976d98523c 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -24,7 +24,7 @@ def get_usr_combined_features(): input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], - param_attr={'name': 'user_table'}, + param_attr='user_table', is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) @@ -36,7 +36,7 @@ def get_usr_combined_features(): usr_gender_emb = layers.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], - param_attr={'name': 'gender_table'}, + param_attr='gender_table', is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) @@ -48,7 +48,7 @@ def get_usr_combined_features(): input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, - param_attr={'name': 'age_table'}) + param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) @@ -58,7 +58,7 @@ def get_usr_combined_features(): usr_job_emb = layers.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], - param_attr={'name': 'job_table'}, + param_attr='job_table', is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) @@ -81,7 +81,7 @@ def get_mov_combined_features(): input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], - param_attr={'name': 'movie_table'}, + param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index b0cd1a518cd1be60474df126470573a5a5b81b70..92d3629d42613e896e93e0149928b50940058169 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -23,25 +23,25 @@ embed_first = fluid.layers.embedding( size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_second = fluid.layers.embedding( input=second_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_third = fluid.layers.embedding( input=third_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_forth = fluid.layers.embedding( input=forth_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 62b2a0f9a11aa20e170fd108083abe04caedc4f3..b6906be60b8ffb7c7afc220ad4f40c6f60a0b112 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -132,26 +132,26 @@ class TestBook(unittest.TestCase): input=first_word, size=[dict_size, embed_size], dtype='float32', - param_attr={'name': 'shared_w'}, + param_attr='shared_w', main_program=program) embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], dtype='float32', - param_attr={'name': 'shared_w'}, + param_attr='shared_w', main_program=program) embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], dtype='float32', - param_attr={'name': 'shared_w'}, + param_attr='shared_w', main_program=program) embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], dtype='float32', - param_attr={'name': 'shared_w'}, + param_attr='shared_w', main_program=program) concat_embed = layers.concat( diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index 84548847f76c6315da000e1b3d062deafe55a05e..36e0c84c0b8e7d40aa56d75c8904a38694881be4 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -271,12 +271,12 @@ class RecurrentOpTest2(RecurrentOpTest1): temp_l = layers.fc(input=x_t, size=self.input_dim, - param_attr={'name': 'W'}, + param_attr='W', bias_attr=False, **self.p_info) temp_r = layers.fc(input=h_pre, size=self.input_dim, - param_attr={'name': 'U'}, + param_attr='U', bias_attr=False, **self.p_info)