未验证 提交 1b6dcc2f 编写于 作者: Y Yu Yang 提交者: GitHub

Feature/param attr (#5996)

* Make param_attr as a strong typed class

Fix #5819
上级 399d3a2d
...@@ -13,13 +13,14 @@ import nets ...@@ -13,13 +13,14 @@ import nets
import optimizer import optimizer
import backward import backward
import regularizer import regularizer
from param_attr import ParamAttr
from core import LoDTensor, CPUPlace, GPUPlace from core import LoDTensor, CPUPlace, GPUPlace
Tensor = LoDTensor Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [ __all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor' 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
] ]
......
import copy import copy
import itertools import itertools
from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating from framework import Variable, default_main_program, default_startup_program, \
unique_name, dtype_is_floating
from paddle.v2.fluid.initializer import Constant, Xavier from paddle.v2.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr
class LayerHelper(object): class LayerHelper(object):
...@@ -59,31 +61,15 @@ class LayerHelper(object): ...@@ -59,31 +61,15 @@ class LayerHelper(object):
@property @property
def param_attr(self): def param_attr(self):
default = {'name': None} return ParamAttr.to_attr(self.kwargs.get('param_attr', None))
actual = self.kwargs.get('param_attr', None)
if actual is None:
actual = default
for default_field in default.keys():
if default_field not in actual:
actual[default_field] = default[default_field]
return actual
@property @property
def bias_attr(self): def bias_attr(self):
default = {'name': None} return ParamAttr.to_attr(self.kwargs.get('bias_attr', None))
bias_attr = self.kwargs.get('bias_attr', None)
if bias_attr is None:
bias_attr = default
if isinstance(bias_attr, dict):
for default_field in default.keys():
if default_field not in bias_attr:
bias_attr[default_field] = default[default_field]
return bias_attr
def multiple_param_attr(self, length): def multiple_param_attr(self, length):
param_attr = self.param_attr param_attr = self.param_attr
if isinstance(param_attr, dict): if isinstance(param_attr, ParamAttr):
param_attr = [param_attr] param_attr = [param_attr]
if len(param_attr) != 1 and len(param_attr) != length: if len(param_attr) != 1 and len(param_attr) != length:
...@@ -111,23 +97,30 @@ class LayerHelper(object): ...@@ -111,23 +97,30 @@ class LayerHelper(object):
raise ValueError("Data Type mismatch") raise ValueError("Data Type mismatch")
return dtype return dtype
def create_parameter(self, attr, shape, dtype, suffix='w', def create_parameter(self,
initializer=None): attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
# Deepcopy the attr so that parameters can be shared in program # Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr) assert isinstance(attr, ParamAttr)
if initializer is not None: suffix = 'b' if is_bias else 'w'
attr_copy['initializer'] = initializer
if default_initializer is None:
if is_bias:
attr.set_default_bias_initializer()
else:
attr.set_default_param_initializer()
else: else:
attr_copy['initializer'] = self._get_default_initializer(dtype) attr.set_default_initializer(default_initializer)
if attr_copy['name'] is None: if attr.name is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix])) attr.name = unique_name(".".join([self.name, suffix]))
self.startup_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy) dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True))
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
name=attr_copy['name'], dtype=dtype, shape=shape, **attr.to_kwargs())
dtype=dtype,
shape=shape,
trainable=attr_copy.get('trainable', True))
def create_tmp_variable(self, dtype): def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var( return self.main_program.current_block().create_var(
...@@ -152,11 +145,7 @@ class LayerHelper(object): ...@@ -152,11 +145,7 @@ class LayerHelper(object):
persistable=True, persistable=True,
initializer=initializer) initializer=initializer)
def append_bias_op(self, def append_bias_op(self, input_var, dim_start=1, dim_end=None):
input_var,
bias_initializer,
dim_start=1,
dim_end=None):
""" """
Append bias operator and return its output. If the user does not set Append bias operator and return its output. If the user does not set
bias_attr, append_bias_op will return input_var bias_attr, append_bias_op will return input_var
...@@ -176,11 +165,7 @@ class LayerHelper(object): ...@@ -176,11 +165,7 @@ class LayerHelper(object):
return input_var return input_var
b = self.create_parameter( b = self.create_parameter(
attr=bias_attr, attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
shape=size,
dtype=input_var.dtype,
suffix='b',
initializer=bias_initializer)
tmp = self.create_tmp_variable(dtype=input_var.dtype) tmp = self.create_tmp_variable(dtype=input_var.dtype)
self.append_op( self.append_op(
type='elementwise_add', type='elementwise_add',
......
...@@ -5,6 +5,7 @@ from initializer import Constant, Normal, Xavier, Initializer ...@@ -5,6 +5,7 @@ from initializer import Constant, Normal, Xavier, Initializer
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
import re import re
import cStringIO import cStringIO
from param_attr import ParamAttr
__all__ = [ __all__ = [
'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat',
...@@ -17,9 +18,7 @@ def fc(input, ...@@ -17,9 +18,7 @@ def fc(input,
size, size,
num_flatten_dims=1, num_flatten_dims=1,
param_attr=None, param_attr=None,
param_initializer=None,
bias_attr=None, bias_attr=None,
bias_initializer=None,
act=None, act=None,
name=None, name=None,
main_program=None, main_program=None,
...@@ -54,23 +53,10 @@ def fc(input, ...@@ -54,23 +53,10 @@ def fc(input,
to the LayerHelper constructor. to the LayerHelper constructor.
""" """
def _get_default_param_initializer():
return Xavier()
def _get_default_bias_initializer():
return Constant()
helper = LayerHelper('fc', **locals()) helper = LayerHelper('fc', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
if param_initializer is None:
param_initializer = _get_default_param_initializer()
if bias_initializer is None:
bias_initializer = _get_default_bias_initializer()
mul_results = [] mul_results = []
for input_var, param_attr in helper.iter_inputs_and_params(): for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape input_shape = input_var.shape
...@@ -78,10 +64,7 @@ def fc(input, ...@@ -78,10 +64,7 @@ def fc(input,
reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
] + [size] ] + [size]
w = helper.create_parameter( w = helper.create_parameter(
attr=param_attr, attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False)
initializer=param_initializer,
shape=param_shape,
dtype=dtype)
tmp = helper.create_tmp_variable(dtype) tmp = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="mul", type="mul",
...@@ -102,7 +85,7 @@ def fc(input, ...@@ -102,7 +85,7 @@ def fc(input,
helper.append_op( helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
# add bias # add bias
pre_activation = helper.append_bias_op(pre_bias, bias_initializer) pre_activation = helper.append_bias_op(pre_bias)
# add activation # add activation
return helper.append_activation(pre_activation) return helper.append_activation(pre_activation)
...@@ -110,7 +93,6 @@ def fc(input, ...@@ -110,7 +93,6 @@ def fc(input,
def embedding(input, def embedding(input,
size, size,
is_sparse=False, is_sparse=False,
param_initializer=None,
param_attr=None, param_attr=None,
dtype='float32', dtype='float32',
main_program=None, main_program=None,
...@@ -119,6 +101,7 @@ def embedding(input, ...@@ -119,6 +101,7 @@ def embedding(input,
Embedding Layer. Embedding Layer.
Args: Args:
param_initializer:
input: The input to the function input: The input to the function
size: The size of the layer size: The size of the layer
is_sparse: A flag that decleares whether the input is sparse is_sparse: A flag that decleares whether the input is sparse
...@@ -136,15 +119,9 @@ def embedding(input, ...@@ -136,15 +119,9 @@ def embedding(input,
""" """
def _get_default_param_initializer():
return Xavier()
helper = LayerHelper('embedding', **locals()) helper = LayerHelper('embedding', **locals())
w = helper.create_parameter( w = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
shape=size,
dtype=dtype,
initializer=param_initializer or _get_default_param_initializer())
tmp = helper.create_tmp_variable(dtype) tmp = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type='lookup_table', type='lookup_table',
...@@ -176,7 +153,7 @@ def dynamic_lstm(input, ...@@ -176,7 +153,7 @@ def dynamic_lstm(input,
if not use_peepholes: if not use_peepholes:
bias_size[1] = 4 * size bias_size[1] = 4 * size
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, suffix='b') attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
hidden = helper.create_tmp_variable(dtype) hidden = helper.create_tmp_variable(dtype)
cell = helper.create_tmp_variable(dtype) cell = helper.create_tmp_variable(dtype)
...@@ -471,19 +448,14 @@ def sums(input, out=None, main_program=None, startup_program=None): ...@@ -471,19 +448,14 @@ def sums(input, out=None, main_program=None, startup_program=None):
def linear_chain_crf(input, def linear_chain_crf(input,
label, label,
param_attr=None, param_attr=None,
param_initializer=None,
main_program=None, main_program=None,
startup_program=None): startup_program=None):
def _get_default_param_initializer():
return Xavier()
helper = LayerHelper('linear_chain_crf', **locals()) helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1] size = input.shape[1]
transition = helper.create_parameter( transition = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr,
shape=[size + 2, size], shape=[size + 2, size],
dtype=helper.input_dtype(), dtype=helper.input_dtype())
initializer=param_initializer or _get_default_param_initializer())
alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) alpha = helper.create_tmp_variable(dtype=helper.input_dtype())
emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
...@@ -646,9 +618,7 @@ def sequence_conv(input, ...@@ -646,9 +618,7 @@ def sequence_conv(input,
filter_stride=1, filter_stride=1,
padding=None, padding=None,
bias_attr=None, bias_attr=None,
bias_initializer=None,
param_attr=None, param_attr=None,
param_initializer=None,
act=None, act=None,
main_program=None, main_program=None,
startup_program=None): startup_program=None):
...@@ -658,30 +628,15 @@ def sequence_conv(input, ...@@ -658,30 +628,15 @@ def sequence_conv(input,
in the input parameters to the function. in the input parameters to the function.
""" """
def _get_default_bias_initializer():
return Constant()
def _get_default_param_initializer():
return Xavier()
# FIXME(dzh) : want to unify the argument of python layer # FIXME(dzh) : want to unify the argument of python layer
# function. So we ignore some unecessary attributes. # function. So we ignore some unecessary attributes.
# such as, padding_trainable, context_start. # such as, padding_trainable, context_start.
helper = LayerHelper('sequence_conv', **locals()) helper = LayerHelper('sequence_conv', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
if param_initializer is None:
param_initializer = _get_default_param_initializer()
if bias_initializer is None:
bias_initializer = _get_default_bias_initializer()
filter_shape = [filter_size * input.shape[1], num_filters] filter_shape = [filter_size * input.shape[1], num_filters]
filter = helper.create_parameter( filter = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr, shape=filter_shape, dtype=dtype)
shape=filter_shape,
dtype=dtype,
initializer=param_initializer)
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
...@@ -696,7 +651,7 @@ def sequence_conv(input, ...@@ -696,7 +651,7 @@ def sequence_conv(input,
'contextStart': -int(filter_size / 2), 'contextStart': -int(filter_size / 2),
'contextLength': filter_size 'contextLength': filter_size
}) })
pre_act = helper.append_bias_op(pre_bias, bias_initializer) pre_act = helper.append_bias_op(pre_bias)
return helper.append_activation(pre_act) return helper.append_activation(pre_act)
...@@ -707,9 +662,7 @@ def conv2d(input, ...@@ -707,9 +662,7 @@ def conv2d(input,
padding=None, padding=None,
groups=None, groups=None,
param_attr=None, param_attr=None,
param_initializer=None,
bias_attr=None, bias_attr=None,
bias_initializer=None,
act=None, act=None,
name=None, name=None,
main_program=None, main_program=None,
...@@ -722,13 +675,6 @@ def conv2d(input, ...@@ -722,13 +675,6 @@ def conv2d(input,
conv-2d output, if mentioned in the input parameters. conv-2d output, if mentioned in the input parameters.
""" """
def _get_default_bias_initializer():
return Constant()
def _get_default_param_initializer(filter_size, num_channels):
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
return Normal(0.0, std, 0)
helper = LayerHelper('conv2d', **locals()) helper = LayerHelper('conv2d', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -750,17 +696,16 @@ def conv2d(input, ...@@ -750,17 +696,16 @@ def conv2d(input,
input_shape = input.shape input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size filter_shape = [num_filters, num_filter_channels] + filter_size
if param_initializer is None: def _get_default_param_initializer():
param_initializer = _get_default_param_initializer(filter_size, std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
num_channels) return Normal(0.0, std, 0)
if bias_initializer is None:
bias_initializer = _get_default_bias_initializer()
filter = helper.create_parameter( filter = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr,
shape=filter_shape, shape=filter_shape,
dtype=dtype, dtype=dtype,
initializer=param_initializer) default_initializer=_get_default_param_initializer())
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
...@@ -774,8 +719,7 @@ def conv2d(input, ...@@ -774,8 +719,7 @@ def conv2d(input,
'paddings': padding, 'paddings': padding,
'groups': groups}) 'groups': groups})
pre_act = helper.append_bias_op( pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
pre_bias, bias_initializer, dim_start=1, dim_end=2)
return helper.append_activation(pre_act) return helper.append_activation(pre_act)
...@@ -876,12 +820,10 @@ def batch_norm(input, ...@@ -876,12 +820,10 @@ def batch_norm(input,
attr=helper.param_attr, attr=helper.param_attr,
shape=param_shape, shape=param_shape,
dtype=dtype, dtype=dtype,
initializer=Constant(1.0)) default_initializer=Constant(1.0))
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=True)
shape=param_shape,
dtype=dtype,
initializer=Constant(0.0))
mean = helper.create_global_variable( mean = helper.create_global_variable(
dtype=input.dtype, shape=param_shape, persistable=True) dtype=input.dtype, shape=param_shape, persistable=True)
...@@ -1356,7 +1298,7 @@ def lod_rank_table(x, level=0, main_program=None): ...@@ -1356,7 +1298,7 @@ def lod_rank_table(x, level=0, main_program=None):
def max_sequence_len(rank_table, main_program=None): def max_sequence_len(rank_table, main_program=None):
""" """
This function creates an operator to calculate the length of This function creates an operator to calculate the length of
max seqence through input rank_table(should be a lod_rank_table) max seqence through input rank_table(should be a lod_rank_table)
""" """
helper = LayerHelper("max_seqence_len", **locals()) helper = LayerHelper("max_seqence_len", **locals())
...@@ -1594,35 +1536,33 @@ def conv2d_transpose(input, ...@@ -1594,35 +1536,33 @@ def conv2d_transpose(input,
padding=None, padding=None,
stride=None, stride=None,
param_attr=None, param_attr=None,
param_initializer=None,
main_program=None, main_program=None,
startup_program=None): startup_program=None):
""" """
The transpose of conv2d layer. The transpose of conv2d layer.
This layer is also known as deconvolution layer. This layer is also known as deconvolution layer.
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output num_filters(int): The number of filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None. parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square. None if use output size to
calculate filter_size calculate filter_size
padding(int|tuple): The padding size. If padding is a tuple, it must padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. padding_H = padding_W = padding.
stride(int|tuple): The stride size. If stride is a tuple, it must stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. stride_H = stride_W = stride.
param_attr: Parameter Attribute. param_attr: Parameter Attribute.
param_initializer(Initializer): Parameter Initializer. Default is Xavier
main_program(Program): the main program main_program(Program): the main program
startup_program(Program): the startup program startup_program(Program): the startup program
Returns: Returns:
Variable: Output image. Variable: Output image.
...@@ -1663,10 +1603,7 @@ def conv2d_transpose(input, ...@@ -1663,10 +1603,7 @@ def conv2d_transpose(input,
filter_shape = [input_channel, num_filters] + filter_size filter_shape = [input_channel, num_filters] + filter_size
img_filter = helper.create_parameter( img_filter = helper.create_parameter(
dtype=input.dtype, dtype=input.dtype, shape=filter_shape, attr=helper.param_attr)
shape=filter_shape,
attr=helper.param_attr,
initializer=param_initializer)
out = helper.create_tmp_variable(dtype=input.dtype) out = helper.create_tmp_variable(dtype=input.dtype)
helper.append_op( helper.append_op(
...@@ -1675,6 +1612,7 @@ def conv2d_transpose(input, ...@@ -1675,6 +1612,7 @@ def conv2d_transpose(input,
'Filter': [img_filter]}, 'Filter': [img_filter]},
outputs={'Output': out}, outputs={'Output': out},
attrs=op_attr) attrs=op_attr)
return out return out
......
from initializer import Initializer, Xavier, Constant
from regularizer import WeightDecayRegularizer
class ParamAttr(object):
def __init__(self,
name=None,
initializer=None,
learning_rate=1.0,
regularizer=None,
trainable=True):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
def set_default_initializer(self, initializer):
if initializer is None:
if self.initializer is None:
raise ValueError("ParamAttr.initializer is not set")
return
if self.initializer is not None:
return
self.initializer = initializer
def set_default_param_initializer(self):
self.set_default_initializer(Xavier())
def set_default_bias_initializer(self):
self.set_default_initializer(Constant(0.0))
@staticmethod
def to_attr(arg):
if arg is None:
return ParamAttr()
elif isinstance(arg, ParamAttr):
return arg
elif isinstance(arg, str) or isinstance(arg, unicode):
return ParamAttr(name=arg)
elif isinstance(arg, Initializer):
return ParamAttr(initializer=arg)
elif isinstance(arg, WeightDecayRegularizer):
return ParamAttr(regularizer=arg)
elif isinstance(arg, bool):
return ParamAttr.to_attr(None) if arg else False
else:
raise TypeError("{0} cast to ParamAttr".format(type(arg)))
def to_kwargs(self, with_initializer=False):
kwargs = {
'name': self.name,
'learning_rate': self.learning_rate,
'regularizer': self.regularizer,
'trainable': self.trainable
}
if with_initializer:
kwargs['initializer'] = self.initializer
return kwargs
...@@ -44,7 +44,7 @@ def db_lstm(): ...@@ -44,7 +44,7 @@ def db_lstm():
size=[pred_len, word_dim], size=[pred_len, word_dim],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'vemb'}) param_attr='vemb')
mark_embedding = fluid.layers.embedding( mark_embedding = fluid.layers.embedding(
input=mark, input=mark,
...@@ -57,8 +57,8 @@ def db_lstm(): ...@@ -57,8 +57,8 @@ def db_lstm():
fluid.layers.embedding( fluid.layers.embedding(
size=[word_dict_len, word_dim], size=[word_dict_len, word_dim],
input=x, input=x,
param_attr={'name': embedding_name, param_attr=fluid.ParamAttr(
'trainable': False}) for x in word_input name=embedding_name, trainable=False)) for x in word_input
] ]
emb_layers.append(predicate_embedding) emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding) emb_layers.append(mark_embedding)
...@@ -125,8 +125,8 @@ def main(): ...@@ -125,8 +125,8 @@ def main():
crf_cost = fluid.layers.linear_chain_crf( crf_cost = fluid.layers.linear_chain_crf(
input=feature_out, input=feature_out,
label=target, label=target,
param_attr={"name": 'crfw', param_attr=fluid.ParamAttr(
"learning_rate": mix_hidden_lr}) name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost) avg_cost = fluid.layers.mean(x=crf_cost)
# TODO(qiao) # TODO(qiao)
# 1. add crf_decode_layer and evaluator # 1. add crf_decode_layer and evaluator
......
...@@ -6,24 +6,21 @@ import paddle.v2.fluid as fluid ...@@ -6,24 +6,21 @@ import paddle.v2.fluid as fluid
BATCH_SIZE = 128 BATCH_SIZE = 128
image = fluid.layers.data(name='x', shape=[784], dtype='float32') image = fluid.layers.data(name='x', shape=[784], dtype='float32')
param_attr = { regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
'name': None,
'regularization': fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
}
hidden1 = fluid.layers.fc(input=image, hidden1 = fluid.layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
param_attr=param_attr) param_attr=regularizer)
hidden2 = fluid.layers.fc(input=hidden1, hidden2 = fluid.layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
param_attr=param_attr) param_attr=regularizer)
predict = fluid.layers.fc(input=hidden2, predict = fluid.layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
param_attr=param_attr) param_attr=regularizer)
label = fluid.layers.data(name='y', shape=[1], dtype='int64') label = fluid.layers.data(name='y', shape=[1], dtype='int64')
......
...@@ -24,7 +24,7 @@ def get_usr_combined_features(): ...@@ -24,7 +24,7 @@ def get_usr_combined_features():
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'}, param_attr='user_table',
is_sparse=IS_SPARSE) is_sparse=IS_SPARSE)
usr_fc = layers.fc(input=usr_emb, size=32) usr_fc = layers.fc(input=usr_emb, size=32)
...@@ -36,7 +36,7 @@ def get_usr_combined_features(): ...@@ -36,7 +36,7 @@ def get_usr_combined_features():
usr_gender_emb = layers.embedding( usr_gender_emb = layers.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'}, param_attr='gender_table',
is_sparse=IS_SPARSE) is_sparse=IS_SPARSE)
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
...@@ -48,7 +48,7 @@ def get_usr_combined_features(): ...@@ -48,7 +48,7 @@ def get_usr_combined_features():
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'}) param_attr='age_table')
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
...@@ -58,7 +58,7 @@ def get_usr_combined_features(): ...@@ -58,7 +58,7 @@ def get_usr_combined_features():
usr_job_emb = layers.embedding( usr_job_emb = layers.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'}, param_attr='job_table',
is_sparse=IS_SPARSE) is_sparse=IS_SPARSE)
usr_job_fc = layers.fc(input=usr_job_emb, size=16) usr_job_fc = layers.fc(input=usr_job_emb, size=16)
...@@ -81,7 +81,7 @@ def get_mov_combined_features(): ...@@ -81,7 +81,7 @@ def get_mov_combined_features():
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'}, param_attr='movie_table',
is_sparse=IS_SPARSE) is_sparse=IS_SPARSE)
mov_fc = layers.fc(input=mov_emb, size=32) mov_fc = layers.fc(input=mov_emb, size=32)
......
...@@ -23,25 +23,25 @@ embed_first = fluid.layers.embedding( ...@@ -23,25 +23,25 @@ embed_first = fluid.layers.embedding(
size=[dict_size, EMBED_SIZE], size=[dict_size, EMBED_SIZE],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}) param_attr='shared_w')
embed_second = fluid.layers.embedding( embed_second = fluid.layers.embedding(
input=second_word, input=second_word,
size=[dict_size, EMBED_SIZE], size=[dict_size, EMBED_SIZE],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}) param_attr='shared_w')
embed_third = fluid.layers.embedding( embed_third = fluid.layers.embedding(
input=third_word, input=third_word,
size=[dict_size, EMBED_SIZE], size=[dict_size, EMBED_SIZE],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}) param_attr='shared_w')
embed_forth = fluid.layers.embedding( embed_forth = fluid.layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, EMBED_SIZE], size=[dict_size, EMBED_SIZE],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}) param_attr='shared_w')
concat_embed = fluid.layers.concat( concat_embed = fluid.layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], axis=1) input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
......
...@@ -132,26 +132,26 @@ class TestBook(unittest.TestCase): ...@@ -132,26 +132,26 @@ class TestBook(unittest.TestCase):
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
dtype='float32', dtype='float32',
param_attr={'name': 'shared_w'}, param_attr='shared_w',
main_program=program) main_program=program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
dtype='float32', dtype='float32',
param_attr={'name': 'shared_w'}, param_attr='shared_w',
main_program=program) main_program=program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
dtype='float32', dtype='float32',
param_attr={'name': 'shared_w'}, param_attr='shared_w',
main_program=program) main_program=program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
dtype='float32', dtype='float32',
param_attr={'name': 'shared_w'}, param_attr='shared_w',
main_program=program) main_program=program)
concat_embed = layers.concat( concat_embed = layers.concat(
......
...@@ -271,12 +271,12 @@ class RecurrentOpTest2(RecurrentOpTest1): ...@@ -271,12 +271,12 @@ class RecurrentOpTest2(RecurrentOpTest1):
temp_l = layers.fc(input=x_t, temp_l = layers.fc(input=x_t,
size=self.input_dim, size=self.input_dim,
param_attr={'name': 'W'}, param_attr='W',
bias_attr=False, bias_attr=False,
**self.p_info) **self.p_info)
temp_r = layers.fc(input=h_pre, temp_r = layers.fc(input=h_pre,
size=self.input_dim, size=self.input_dim,
param_attr={'name': 'U'}, param_attr='U',
bias_attr=False, bias_attr=False,
**self.p_info) **self.p_info)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册