提交 c4ba8705 编写于 作者: W wanghaoshuang

Merge branch 'develop' of https://github.com/paddlepaddle/models into ce_ocr

url=http://deep-asr-data.gz.bcebos.com/aishell_pretrained_model.tar.gz
md5=7b51bde64e884f43901b7a3461ccbfa3
wget -c $url
echo "Checking md5 sum ..."
md5sum_tmp=`md5sum aishell_pretrained_model.tar.gz | cut -d ' ' -f1`
if [ $md5sum_tmp != $md5 ]; then
echo "Md5sum check failed, please remove and redownload "
"aishell_pretrained_model.tar.gz."
exit 1
fi
tar xvf aishell_pretrained_model.tar.gz
...@@ -16,10 +16,18 @@ def parse_args(): ...@@ -16,10 +16,18 @@ def parse_args():
default='cer', default='cer',
choices=['cer', 'wer'], choices=['cer', 'wer'],
help="Error rate type. (default: %(default)s)") help="Error rate type. (default: %(default)s)")
parser.add_argument(
'--special_tokens',
type=str,
default='<SPOKEN_NOISE>',
help="Special tokens in scoring CER, seperated by space. "
"They shouldn't be splitted and should be treated as one special "
"character. Example: '<SPOKEN_NOISE> <bos> <eos>' "
"(default: %(default)s)")
parser.add_argument( parser.add_argument(
'--ref', type=str, required=True, help="The ground truth text.") '--ref', type=str, required=True, help="The ground truth text.")
parser.add_argument( parser.add_argument(
'--hyp', type=str, required=True, help="The decoding result.") '--hyp', type=str, required=True, help="The decoding result text.")
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -31,6 +39,8 @@ if __name__ == '__main__': ...@@ -31,6 +39,8 @@ if __name__ == '__main__':
sum_errors, sum_ref_len = 0.0, 0 sum_errors, sum_ref_len = 0.0, 0
sent_cnt, not_in_ref_cnt = 0, 0 sent_cnt, not_in_ref_cnt = 0, 0
special_tokens = args.special_tokens.split(" ")
with open(args.ref, "r") as ref_txt: with open(args.ref, "r") as ref_txt:
line = ref_txt.readline() line = ref_txt.readline()
while line: while line:
...@@ -51,6 +61,8 @@ if __name__ == '__main__': ...@@ -51,6 +61,8 @@ if __name__ == '__main__':
continue continue
if args.error_rate_type == 'cer': if args.error_rate_type == 'cer':
for sp_tok in special_tokens:
sent = sent.replace(sp_tok, '\0')
errors, ref_len = char_errors( errors, ref_len = char_errors(
ref_dict[key].decode("utf8"), ref_dict[key].decode("utf8"),
sent.decode("utf8"), sent.decode("utf8"),
......
...@@ -9,3 +9,4 @@ log* ...@@ -9,3 +9,4 @@ log*
output* output*
pred pred
eval_tools eval_tools
box*
...@@ -93,7 +93,7 @@ tar -xf vgg_ilsvrc_16_fc_reduced.tar.gz && rm -f vgg_ilsvrc_16_fc_reduced.tar.gz ...@@ -93,7 +93,7 @@ tar -xf vgg_ilsvrc_16_fc_reduced.tar.gz && rm -f vgg_ilsvrc_16_fc_reduced.tar.gz
`train.py` 是训练模块的主要执行程序,调用示例如下: `train.py` 是训练模块的主要执行程序,调用示例如下:
```bash ```bash
python -u train.py --batch_size=16 --pretrained_model=vgg_ilsvrc_16_fc_reduced python -u train.py --batch_size=12 --pretrained_model=vgg_ilsvrc_16_fc_reduced
``` ```
- 可以通过设置 `export CUDA_VISIBLE_DEVICES=0,1,2,3` 指定想要使用的GPU数量。 - 可以通过设置 `export CUDA_VISIBLE_DEVICES=0,1,2,3` 指定想要使用的GPU数量。
- 更多的可选参数见: - 更多的可选参数见:
......
...@@ -16,14 +16,14 @@ add_arg = functools.partial(add_arguments, argparser=parser) ...@@ -16,14 +16,14 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('parallel', bool, True, "Whether use multi-GPU/threads or not.") add_arg('parallel', bool, True, "Whether use multi-GPU/threads or not.")
add_arg('learning_rate', float, 0.001, "The start learning rate.") add_arg('learning_rate', float, 0.001, "The start learning rate.")
add_arg('batch_size', int, 16, "Minibatch size.") add_arg('batch_size', int, 12, "Minibatch size.")
add_arg('num_passes', int, 160, "Epoch number.") add_arg('num_passes', int, 160, "Epoch number.")
add_arg('use_gpu', bool, True, "Whether use GPU.") add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('use_pyramidbox', bool, True, "Whether use PyramidBox model.") add_arg('use_pyramidbox', bool, True, "Whether use PyramidBox model.")
add_arg('model_save_dir', str, 'output', "The path to save model.") add_arg('model_save_dir', str, 'output', "The path to save model.")
add_arg('resize_h', int, 640, "The resized image height.") add_arg('resize_h', int, 640, "The resized image height.")
add_arg('resize_w', int, 640, "The resized image width.") add_arg('resize_w', int, 640, "The resized image width.")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.") add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, './vgg_ilsvrc_16_fc_reduced/', "The init model path.") add_arg('pretrained_model', str, './vgg_ilsvrc_16_fc_reduced/', "The init model path.")
#yapf: enable #yapf: enable
......
...@@ -24,15 +24,10 @@ def calc_diff(f1, f2): ...@@ -24,15 +24,10 @@ def calc_diff(f1, f2):
#print d2.shape #print d2.shape
#print d1[0, 0, 0:10, 0:10] #print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10] #print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2]
#d2 = d2[:, :, 1:-2, 1:-2]
d1 = d1.flatten() d1 = d1.flatten()
d2 = d2.flatten() d2 = d2.flatten()
#print d1[:10]
#print d2[:10]
d1_num = reduce(lambda x, y: x * y, d1.shape) d1_num = reduce(lambda x, y: x * y, d1.shape)
d2_num = reduce(lambda x, y: x * y, d2.shape) d2_num = reduce(lambda x, y: x * y, d2.shape)
if d1_num != d2_num: if d1_num != d2_num:
...@@ -41,7 +36,11 @@ def calc_diff(f1, f2): ...@@ -41,7 +36,11 @@ def calc_diff(f1, f2):
assert (d1_num == d2_num), "their shape is not consistent" assert (d1_num == d2_num), "their shape is not consistent"
try: try:
mask = np.abs(d1) >= np.abs(d2)
mask = mask.astype('int32')
df = np.abs(d1 - d2) df = np.abs(d1 - d2)
df = df / (1.0e-10 + np.abs(d1) * mask + np.abs(d2) * (1 - mask))
max_df = np.max(df) max_df = np.max(df)
sq_df = np.mean(df * df) sq_df = np.mean(df * df)
return max_df, sq_df return max_df, sq_df
......
...@@ -8,6 +8,12 @@ import axpy ...@@ -8,6 +8,12 @@ import axpy
import flatten import flatten
import argmax import argmax
import reshape import reshape
import roipooling
import priorbox
import permute
import detection_out
import normalize
import select
#custom layer import ends #custom layer import ends
......
""" A custom layer for 'detectionout' used in 'SSD' model to produce outputs
Note: Since Paddle's implementation of 'detectionout' applied 'flatten' and 'softmax' ops on the input of 'conf',
while Caffe's implementation do not. Hence, you should ajust generated 'ssd.py' to remove 'softmax' and 'flatten' ops applied on 'conf' input.
"""
from .register import register
def detectionoutput_shape(input_shape):
""" the output shape of this layer is dynamic and not determined by 'input_shape'
Args:
@input_shape (list of int): input shape
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
output_shape = [-1, 6]
return output_shape
def detectionoutput_layer(inputs,
name,
background_label=0,
share_location=True,
nms_param=None,
keep_top_k=100,
confidence_threshold=0.1):
""" build a layer of type 'detectionout' using fluid
Args:
@inputs (list of variables): input fluid variables for this layer
@name (str): name for this layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
if nms_param is None:
nms_param = {"nms_threshold": 0.3, "top_k": 10, "eta": 1.0}
mbox_conf_flatten = inputs[1]
mbox_priorbox = inputs[2]
mbox_priorbox_list = fluid.layers.split(mbox_priorbox, 2, dim=1)
pb = mbox_priorbox_list[0]
pbv = mbox_priorbox_list[1]
pb = fluid.layers.reshape(x=pb, shape=[-1, 4])
pbv = fluid.layers.reshape(x=pbv, shape=[-1, 4])
mbox_loc = inputs[0]
mbox_loc = fluid.layers.reshape(
x=mbox_loc, shape=[-1, mbox_conf_flatten.shape[1], 4])
default = {"nms_threshold": 0.3, "top_k": 10, "eta": 1.0}
fields = ['eta', 'top_k', 'nms_threshold']
for f in default.keys():
if not nms_param.has_key(f):
nms_param[f] = default[f]
nmsed_outs = fluid.layers.detection_output(
scores=mbox_conf_flatten,
loc=mbox_loc,
prior_box=pb,
prior_box_var=pbv,
background_label=background_label,
nms_threshold=nms_param["nms_threshold"],
nms_top_k=nms_param["top_k"],
keep_top_k=keep_top_k,
score_threshold=confidence_threshold,
nms_eta=nms_param["eta"])
return nmsed_outs
register(
kind='DetectionOutput',
shape=detectionoutput_shape,
layer=detectionoutput_layer)
...@@ -4,11 +4,6 @@ ...@@ -4,11 +4,6 @@
from .register import register from .register import register
def import_fluid():
import paddle.fluid as fluid
return fluid
def flatten_shape(input_shape, axis=1, end_axis=-1): def flatten_shape(input_shape, axis=1, end_axis=-1):
""" calculate the output shape of this layer using input shape """ calculate the output shape of this layer using input shape
...@@ -28,7 +23,7 @@ def flatten_shape(input_shape, axis=1, end_axis=-1): ...@@ -28,7 +23,7 @@ def flatten_shape(input_shape, axis=1, end_axis=-1):
start_axis += len(input_shape) start_axis += len(input_shape)
if end_axis < 0: if end_axis < 0:
end_axis += len(input_shape) end_axis += len(input_shape) + 1
assert start_axis <= end_axis, 'invalid axis[%d] or end_axis[%d] params'\ assert start_axis <= end_axis, 'invalid axis[%d] or end_axis[%d] params'\
% (start_axis, end_axis) % (start_axis, end_axis)
...@@ -52,18 +47,16 @@ def flatten_layer(input, name, axis=1, end_axis=-1): ...@@ -52,18 +47,16 @@ def flatten_layer(input, name, axis=1, end_axis=-1):
Returns: Returns:
output (variable): output variable for this layer output (variable): output variable for this layer
""" """
fluid = import_fluid() import paddle.fluid as fluid
input_shape = list(input.shape) input_shape = list(input.shape)
dims = len(input_shape)
start_axis = axis if axis >= 0 else axis + dims
end_axis = end_axis if end_axis >= 0 else end_axis + dims
assert start_axis <= end_axis, 'invalid axis or end_axis params' if input_shape[0] == -1:
output_shape = input_shape[0:start_axis] input_shape[0] = 1
flat_sz = reduce(lambda a, b: a * b, input_shape[start_axis:end_axis]) output_shape = flatten_shape(input_shape, axis=axis, end_axis=end_axis)
output_shape += [flat_sz] output_shape[0] = -1
output_shape += input_shape[end_axis:-1] else:
output_shape = flatten_shape(input_shape, axis=axis, end_axis=end_axis)
output = fluid.layers.reshape(input, shape=output_shape, name=name) output = fluid.layers.reshape(input, shape=output_shape, name=name)
......
""" A custom layer for 'normalize' op
"""
from .register import register
def normalize_shape(input_shape,
across_spatial=True,
scale_filler=True,
eps=1e-10):
""" calculate the output shape of this layer using input shapes
Args:
@input_shape (list of tuples): input shape
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
output_shape = input_shape
return output_shape
def normalize_layer(input,
name,
across_spatial=True,
scale_filler=True,
channel_shared=False,
eps=1e-10):
""" build a layer of type 'normalize' using fluid
Args:
@inputs (list of variables): input fluid variables for this layer
@name (str): name for this layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
param_prefix = name.split('.')[0]
assert across_spatial == False, "Only support across_spatial == False for Normalize[%s]" % (
name)
l2_norm = fluid.layers.l2_normalize(input, axis=1) # l2 norm along channel
shape = [1] if channel_shared else [input.shape[1]]
scale_attr = fluid.ParamAttr(name=param_prefix + '_scale')
scale_param = fluid.layers.create_parameter(
shape=shape, dtype=input.dtype, name=name, attr=scale_attr)
out = fluid.layers.elementwise_mul(
x=l2_norm, y=scale_param, axis=-1 if channel_shared else 1)
return out
register(kind='Normalize', shape=normalize_shape, layer=normalize_layer)
""" A custom layer for 'Permute' which is equivalent to transpose in paddle
"""
from .register import register
def permute_shape(input_shape, order):
""" calculate the output shape of this layer using input shapes
Args:
@input_shape (list of numbers): input shape
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
output_shape = []
for ii in order:
assert ii < len(input_shape), "invalid order for permute[%s]" % (name)
output_shape.append(input_shape[ii])
return output_shape
def permute_layer(input, name, order):
""" build a layer of type 'permute' using fluid
Args:
@input (input variable): input fluid variables for this layer
@name (str): name for this layer
@order (list of int): order to permute the dims
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
output = fluid.layers.transpose(input, order, name=name)
return output
register(kind='Permute', shape=permute_shape, layer=permute_layer)
""" A custom layer for 'priorbox' which is used in ssd to generate prior box info
Since the order of prior box is different between caffe and paddle,
we use 'slice' and 'concate' ops to align them.
"""
from .register import register
def priorbox_shape(input_shapes, min_size, max_size=None, aspect_ratio=None):
""" calculate the output shape of this layer using input shapes
Args:
@input_shapes (list of tuples): a list of input shapes
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
assert len(input_shapes) == 2, "invalid inputs for Priorbox[%s]" % (name)
fc_shape = input_shapes[0]
N = 1
if not max_size == None:
N += 1
if not aspect_ratio == None:
N += 2 * len(aspect_ratio)
N_bbx = fc_shape[2] * fc_shape[3] * N
output_shape = [1, 2, 4 * N_bbx]
return output_shape
def priorbox_layer(inputs,
name,
min_size,
step,
max_size=None,
aspect_ratio=None,
flip=True,
clip=False,
variance=[],
offset=0.5):
""" build a layer of type 'Priorbox' using fluid
Args:
@inputs (list of variables): input fluid variables for this layer
@name (str): name for this layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
assert len(inputs) == 2, "invalid inputs for Priorbox[%s]" % (name)
input = inputs[0]
image = inputs[1]
box, variance_ = fluid.layers.prior_box(
input,
image,
min_size,
max_size,
aspect_ratio,
variance,
flip,
clip, (step, step),
offset,
min_max_aspect_ratios_order=True)
"""
#adjust layout when the output is not consistent with caffe's
feat_shape = list(input.shape)
H = feat_shape[2]
W = feat_shape[3]
box_tmp = fluid.layers.reshape(box, [H, W, -1, 4])
nb_prior_bbx = int(box_tmp.shape[2])
tensor_list = fluid.layers.split(box_tmp, nb_prior_bbx, 2)
#TODO:
# current implementation for this layer is not efficient
# and we should fix this bug in future when Paddle support the same prior-box layout with Caffe
index_list = [0]
index_list = index_list * nb_prior_bbx
index_offset = 0
if max_size is not None:
index_list[1] = -1
index_offset = 1
for ii in xrange(2 * len(aspect_ratio)):
index_list[ii + 1 + index_offset] = ii + 1
tensor_list_gathered = [tensor_list[ii] for ii in index_list]
caffe_prior_bbx = fluid.layers.concat(tensor_list_gathered, axis=2)
box = fluid.layers.reshape(caffe_prior_bbx, [1, 1, -1])
"""
box = fluid.layers.reshape(box, [1, 1, -1])
variance_ = fluid.layers.reshape(variance_, [1, 1, -1])
output = fluid.layers.concat([box, variance_], axis=1)
return output
register(kind='PriorBox', shape=priorbox_shape, layer=priorbox_layer)
...@@ -68,15 +68,23 @@ def reshape_shape(input_sp, shape, axis=0, num_axes=-1): ...@@ -68,15 +68,23 @@ def reshape_shape(input_sp, shape, axis=0, num_axes=-1):
top_dim = shape['dim'][i] top_dim = shape['dim'][i]
if top_dim == 0: if top_dim == 0:
copy_axes.append(i) copy_axes.append(i)
copy_axis_index = start_axis + i
output_shape[copy_axis_index] = input_shape[copy_axis_index]
elif top_dim == -1: elif top_dim == -1:
assert inferred_axis == -1, "[Reshape]new shape contains multiple -1 dims" assert inferred_axis == -1, "[Reshape]new shape contains multiple -1 dims"
inferred_axis = i
else: else:
constant_count *= top_dim constant_count *= top_dim
if inferred_axis >= 0: if inferred_axis >= 0:
explicit_count = constant_count explicit_count = constant_count
explicit_count *= count(input_shape[0:start_axis]) l = input_shape[0:start_axis]
explicit_count *= count(input_shape[end_axis:]) if len(l) > 0:
explicit_count *= count(l)
l = input_shape[end_axis:]
if len(l) > 0:
explicit_count *= count(l)
for i in range(len(copy_axes)): for i in range(len(copy_axes)):
explicit_count *= output_shape[start_axis + copy_axes[i]] explicit_count *= output_shape[start_axis + copy_axes[i]]
...@@ -84,6 +92,7 @@ def reshape_shape(input_sp, shape, axis=0, num_axes=-1): ...@@ -84,6 +92,7 @@ def reshape_shape(input_sp, shape, axis=0, num_axes=-1):
assert input_count % explicit_count == 0, "[Reshape]botom count[%d] "\ assert input_count % explicit_count == 0, "[Reshape]botom count[%d] "\
"must be divisible by product of the specified dimensions[%d] "\ "must be divisible by product of the specified dimensions[%d] "\
% (input_count, explicit_count) % (input_count, explicit_count)
output_shape[start_axis + inferred_axis] = input_count / explicit_count
output_count = count(output_shape) output_count = count(output_shape)
assert output_count == input_count, "[Reshape]output count[%d] must match input count[%d]" % ( assert output_count == input_count, "[Reshape]output count[%d] must match input count[%d]" % (
...@@ -117,6 +126,7 @@ def reshape_layer(input, name, shape, axis=0, num_axes=-1): ...@@ -117,6 +126,7 @@ def reshape_layer(input, name, shape, axis=0, num_axes=-1):
output_shape = reshape_shape(input_shape, shape, axis, num_axes) output_shape = reshape_shape(input_shape, shape, axis, num_axes)
output = fluid.layers.reshape(input, shape=output_shape, name=name) output = fluid.layers.reshape(input, shape=output_shape, name=name)
return output return output
......
""" a custom layer for 'ROIPooling', maybe we should implement this in standard way.
more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/ROIPooling.html
"""
from .register import register
def roipooling_shape(input_shapes, pooled_h, pooled_w, spatial_scale):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
@out_max_val (bool): parameter from caffe's ROIPooling layer
@top_k (int): parameter from caffe's ROIPooling layer
@axis (int): parameter from caffe's ROIPooling layer
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
assert len(input_shapes) == 2, "not valid input shape for roipooling layer"
base_fea_shape = input_shapes[0]
rois_shape = input_shapes[1]
output_shape = base_fea_shape
output_shape[0] = rois_shape[0]
output_shape[2] = pooled_h
output_shape[3] = pooled_w
return output_shape
def roipooling_layer(inputs, name, pooled_h, pooled_w, spatial_scale):
""" build a layer of type 'ROIPooling' using fluid
Args:
@input (variable): input fluid variable for this layer
@name (str): name for this layer
@out_max_val (bool): parameter from caffe's ROIPooling layer
@top_k (int): parameter from caffe's ROIPooling layer
@axis (int): parameter from caffe's ROIPooling layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
assert len(inputs) == 2, "not valid input shape for roipooling layer"
base_fea = inputs[0]
rois = inputs[1][:, 1:5]
rois_fea = fluid.layers.roi_pool(base_fea, rois, pooled_h, pooled_w,
spatial_scale)
return rois_fea
register(kind='ROIPooling', shape=roipooling_shape, layer=roipooling_layer)
""" a custom layer for 'select' which is used to replace standard 'Slice' layer
for converting layer with multiple different output tensors
"""
from .register import register
def select_shape(input_shape, slice_point, axis=1):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
@slice_point (list): parameter from caffe's Slice layer
@axis (int): parameter from caffe's Slice layer
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
input_shape = list(input_shape)
start = slice_point[0]
if len(slice_point) == 2:
end = slice_point[1]
else:
end = input_shape[axis]
assert end > start, "invalid slice_point with [start:%d, end:%d]"\
% (start, end)
output_shape = input_shape
output_shape[axis] = end - start
return output_shape
def select_layer(input, name, slice_point, axis=1):
""" build a layer of type 'Slice' using fluid
Args:
@input (variable): input fluid variable for this layer
@name (str): name for this layer
@slice_point (list): parameter from caffe's Slice layer
@axis (int): parameter from caffe's Slice layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
input_shape = list(input.shape)
start = slice_point[0]
if len(slice_point) == 2:
end = slice_point[1]
else:
end = input_shape[axis]
sections = []
if start > 0:
sections.append(start)
pos = len(sections)
sections.append(end - start)
if end != input_shape[axis]:
sections.append(input_shape[axis] - end)
outputs = fluid.layers.split(input, sections, dim=axis, name=name)
return outputs[pos]
register(kind='Select', shape=select_shape, layer=select_layer)
...@@ -16,7 +16,7 @@ LAYER_DESCRIPTORS = { ...@@ -16,7 +16,7 @@ LAYER_DESCRIPTORS = {
'Concat': shape_concat, 'Concat': shape_concat,
'ContrastiveLoss': shape_scalar, 'ContrastiveLoss': shape_scalar,
'Convolution': shape_convolution, 'Convolution': shape_convolution,
'Deconvolution': shape_not_implemented, 'Deconvolution': shape_deconvolution,
'Data': shape_data, 'Data': shape_data,
'Dropout': shape_identity, 'Dropout': shape_identity,
'DummyData': shape_data, 'DummyData': shape_data,
...@@ -39,6 +39,7 @@ LAYER_DESCRIPTORS = { ...@@ -39,6 +39,7 @@ LAYER_DESCRIPTORS = {
'Pooling': shape_pool, 'Pooling': shape_pool,
'Power': shape_identity, 'Power': shape_identity,
'ReLU': shape_identity, 'ReLU': shape_identity,
'PReLU': shape_identity,
'Scale': shape_identity, 'Scale': shape_identity,
'Sigmoid': shape_identity, 'Sigmoid': shape_identity,
'SigmoidCrossEntropyLoss': shape_scalar, 'SigmoidCrossEntropyLoss': shape_scalar,
...@@ -179,6 +180,11 @@ class LayerAdapter(object): ...@@ -179,6 +180,11 @@ class LayerAdapter(object):
@property @property
def parameters(self): def parameters(self):
name = NodeDispatch.get_handler_name(self.kind) name = NodeDispatch.get_handler_name(self.kind)
if self.kind.lower() == "normalize":
name = "norm"
elif self.kind.lower() == "deconvolution":
name = "convolution"
name = '_'.join((name, 'param')) name = '_'.join((name, 'param'))
try: try:
return getattr(self.layer, name) return getattr(self.layer, name)
...@@ -207,7 +213,9 @@ class LayerAdapter(object): ...@@ -207,7 +213,9 @@ class LayerAdapter(object):
@property @property
def kernel_parameters(self): def kernel_parameters(self):
assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) assert self.kind in (NodeKind.Convolution, NodeKind.Pooling,\
NodeKind.Deconvolution)
params = self.parameters params = self.parameters
k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0)
k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1)
...@@ -217,9 +225,25 @@ class LayerAdapter(object): ...@@ -217,9 +225,25 @@ class LayerAdapter(object):
params.stride_w, params.stride, 1, default=1) params.stride_w, params.stride, 1, default=1)
p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
p_w = self.get_kernel_value(params.pad_w, params.pad, 1, default=0) p_w = self.get_kernel_value(params.pad_w, params.pad, 1, default=0)
return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
KernelParameters = namedtuple('KernelParameters', [ dila_h = dila_w = 1
'kernel_h', 'kernel_w', 'stride_h', 'stride_w', 'pad_h', 'pad_w' if self.kind in (NodeKind.Convolution, NodeKind.Deconvolution):
]) dila_len = len(params.dilation)
if dila_len == 2:
dila_h = params.dilation[0]
dila_w = params.dilation[1]
elif dila_len == 1:
dila_h = dila_w = params.dilation[0]
else:
assert dila_len == 0, "invalid length[%s] of dilation in convolution" % (
dila_len)
return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w, dila_h, dila_w)
KernelParameters = namedtuple(
'KernelParameters',
[
'kernel_h', 'kernel_w', 'stride_h', 'stride_w', 'pad_h', 'pad_w',
'dila_h', 'dila_w'
], )
...@@ -91,7 +91,7 @@ class Network(object): ...@@ -91,7 +91,7 @@ class Network(object):
name = '%s_%s' % (op_name, param_name) name = '%s_%s' % (op_name, param_name)
v = fluid.global_scope().find_var(name) v = fluid.global_scope().find_var(name)
w = v.get_tensor() w = v.get_tensor()
w.set(data, place) w.set(data.reshape(w.shape()), place)
except ValueError: except ValueError:
if not ignore_missing: if not ignore_missing:
raise raise
...@@ -144,6 +144,7 @@ class Network(object): ...@@ -144,6 +144,7 @@ class Network(object):
relu=True, relu=True,
relu_negative_slope=0.0, relu_negative_slope=0.0,
padding=None, padding=None,
dilation=1,
group=1, group=1,
biased=True): biased=True):
if padding is None: if padding is None:
...@@ -173,6 +174,7 @@ class Network(object): ...@@ -173,6 +174,7 @@ class Network(object):
num_filters=c_o, num_filters=c_o,
stride=[s_h, s_w], stride=[s_h, s_w],
padding=padding, padding=padding,
dilation=dilation,
groups=group, groups=group,
param_attr=fluid.ParamAttr(name=prefix + "weights"), param_attr=fluid.ParamAttr(name=prefix + "weights"),
bias_attr=fluid.ParamAttr(name=prefix + "biases"), bias_attr=fluid.ParamAttr(name=prefix + "biases"),
...@@ -183,13 +185,71 @@ class Network(object): ...@@ -183,13 +185,71 @@ class Network(object):
return output return output
@layer
def deconv(self,
input,
k_h,
k_w,
c_o,
s_h,
s_w,
name,
relu=True,
relu_negative_slope=0.0,
padding=None,
dilation=1,
biased=True):
if padding is None:
padding = [0, 0]
# Get the number of channels in the input
c_i, h_i, w_i = input.shape[1:]
fluid = import_fluid()
prefix = name + '_'
leaky_relu = False
act = 'relu'
if relu is False:
act = None
elif relu_negative_slope != 0.0:
leaky_relu = True
act = None
p_h = padding[0]
p_w = padding[1]
h_o = (h_i - 1) * s_h - 2 * p_h + dilation * (k_h - 1) + 1
w_o = (w_i - 1) * s_w - 2 * p_w + dilation * (k_w - 1) + 1
output = fluid.layers.conv2d_transpose(
name=self.get_unique_output_name(name, 'conv2d_transpose'),
input=input,
num_filters=c_o,
output_size=[h_o, w_o],
filter_size=[k_h, k_w],
padding=padding,
stride=[s_h, s_w],
dilation=dilation,
param_attr=fluid.ParamAttr(name=prefix + "weights"),
bias_attr=fluid.ParamAttr(name=prefix + "biases"),
act=act)
if leaky_relu:
output = fluid.layers.leaky_relu(output, alpha=relu_negative_slope)
return output
@layer @layer
def relu(self, input, name): def relu(self, input, name):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.relu( output = fluid.layers.relu(input)
name=self.get_unique_output_name(name, 'relu'), x=input)
return output return output
@layer
def prelu(self, input, channel_shared, name):
#fluid = import_fluid()
#output = fluid.layers.relu(input)
#return output
raise NotImplementedError('prelu not implemented')
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name): name):
# Get the number of channels in the input # Get the number of channels in the input
...@@ -256,6 +316,12 @@ class Network(object): ...@@ -256,6 +316,12 @@ class Network(object):
return fluid.layers.sigmoid( return fluid.layers.sigmoid(
input, name=self.get_unique_output_name(name, 'sigmoid')) input, name=self.get_unique_output_name(name, 'sigmoid'))
@layer
def tanh(self, input, name):
fluid = import_fluid()
return fluid.layers.tanh(
input, name=self.get_unique_output_name(name, 'tanh'))
@layer @layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0): def lrn(self, input, radius, alpha, beta, name, bias=1.0):
fluid = import_fluid() fluid = import_fluid()
...@@ -322,7 +388,8 @@ class Network(object): ...@@ -322,7 +388,8 @@ class Network(object):
name, name,
scale_offset=True, scale_offset=True,
eps=1e-5, eps=1e-5,
relu=False): relu=False,
relu_negative_slope=0.0):
# NOTE: Currently, only inference is supported # NOTE: Currently, only inference is supported
fluid = import_fluid() fluid = import_fluid()
prefix = name + '_' prefix = name + '_'
...@@ -332,6 +399,15 @@ class Network(object): ...@@ -332,6 +399,15 @@ class Network(object):
name=prefix + 'offset') name=prefix + 'offset')
mean_name = prefix + 'mean' mean_name = prefix + 'mean'
variance_name = prefix + 'variance' variance_name = prefix + 'variance'
leaky_relu = False
act = 'relu'
if relu is False:
act = None
elif relu_negative_slope != 0.0:
leaky_relu = True
act = None
output = fluid.layers.batch_norm( output = fluid.layers.batch_norm(
name=self.get_unique_output_name(name, 'batch_norm'), name=self.get_unique_output_name(name, 'batch_norm'),
input=input, input=input,
...@@ -341,7 +417,10 @@ class Network(object): ...@@ -341,7 +417,10 @@ class Network(object):
moving_mean_name=mean_name, moving_mean_name=mean_name,
moving_variance_name=variance_name, moving_variance_name=variance_name,
epsilon=eps, epsilon=eps,
act='relu' if relu is True else None) act=act)
if leaky_relu:
output = fluid.layers.leaky_relu(output, alpha=relu_negative_slope)
return output return output
......
...@@ -9,21 +9,6 @@ from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ...@@ -9,21 +9,6 @@ from ..transformers import (DataInjector, DataReshaper, NodeRenamer,
from . import network from . import network
def get_padding_type(kernel_params, input_shape, output_shape):
'''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
Caffe supports arbitrary padding values, while Paddle only
supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
can be translated to Paddle. There are some subtleties to
how the padding edge-cases are handled. These are described here:
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
'''
k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
if p_h > 0 or p_w > 0:
return [p_h, p_w]
else:
return None
class PaddleNode(object): class PaddleNode(object):
'''An intermediate representation for Paddle operations.''' '''An intermediate representation for Paddle operations.'''
...@@ -78,10 +63,11 @@ class PaddleMapper(NodeMapper): ...@@ -78,10 +63,11 @@ class PaddleMapper(NodeMapper):
def get_kernel_params(self, node): def get_kernel_params(self, node):
kernel_params = node.layer.kernel_parameters kernel_params = node.layer.kernel_parameters
input_shape = node.get_only_parent().output_shape input_shape = node.get_only_parent().output_shape
padding = get_padding_type(kernel_params, input_shape, padding = [kernel_params.pad_h, kernel_params.pad_w]
node.output_shape) if padding[0] == 0 and padding[1] == 0:
# Only emit the padding if it's not the default value. padding = {}
padding = {'padding': padding} if padding is not None else {} else:
padding = {'padding': padding}
return (kernel_params, padding) return (kernel_params, padding)
def map_convolution(self, node): def map_convolution(self, node):
...@@ -95,15 +81,44 @@ class PaddleMapper(NodeMapper): ...@@ -95,15 +81,44 @@ class PaddleMapper(NodeMapper):
kwargs['group'] = group kwargs['group'] = group
if not node.parameters.bias_term: if not node.parameters.bias_term:
kwargs['biased'] = False kwargs['biased'] = False
if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)
assert kernel_params.kernel_h == h assert kernel_params.kernel_h == h
assert kernel_params.kernel_w == w assert kernel_params.kernel_w == w
return MaybeActivated(node)( return MaybeActivated(node)(
'conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, 'conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
kernel_params.stride_h, kernel_params.stride_w, **kwargs) kernel_params.stride_h, kernel_params.stride_w, **kwargs)
def map_deconvolution(self, node):
(kernel_params, kwargs) = self.get_kernel_params(node)
h = kernel_params.kernel_h
w = kernel_params.kernel_w
c_o = node.output_shape[1]
c_i = node.parents[0].output_shape[1]
if not node.parameters.bias_term:
kwargs['biased'] = False
if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)
assert kernel_params.kernel_h == h
assert kernel_params.kernel_w == w
return MaybeActivated(node)(
'deconv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
kernel_params.stride_h, kernel_params.stride_w, **kwargs)
def map_relu(self, node): def map_relu(self, node):
return PaddleNode('relu') return PaddleNode('relu')
def map_prelu(self, node):
channel_shared = getattr(node.parameters, 'channel_shared', False)
return PaddleNode('prelu', channel_shared)
def map_tanh(self, node):
return PaddleNode('tanh')
def map_pooling(self, node): def map_pooling(self, node):
pool_type = node.parameters.pool pool_type = node.parameters.pool
if pool_type == 0: if pool_type == 0:
......
...@@ -6,6 +6,8 @@ from .errors import KaffeError ...@@ -6,6 +6,8 @@ from .errors import KaffeError
Tensor4DShape = namedtuple('Tensor4DShape', Tensor4DShape = namedtuple('Tensor4DShape',
['batch_size', 'channels', 'height', 'width']) ['batch_size', 'channels', 'height', 'width'])
Tensor3DShape = namedtuple('Tensor3DShape', ['batch_size', 'data1', 'data2'])
Tensor2DShape = namedtuple('Tensor2DShape', ['batch_size', 'data']) Tensor2DShape = namedtuple('Tensor2DShape', ['batch_size', 'data'])
ScalarShape = namedtuple('ScalarShape', ['batch_size']) ScalarShape = namedtuple('ScalarShape', ['batch_size'])
...@@ -14,6 +16,8 @@ ScalarShape = namedtuple('ScalarShape', ['batch_size']) ...@@ -14,6 +16,8 @@ ScalarShape = namedtuple('ScalarShape', ['batch_size'])
def make_tensor(batch_size, d1=None, d2=None, d3=None): def make_tensor(batch_size, d1=None, d2=None, d3=None):
if d3 is not None: if d3 is not None:
return Tensor4DShape(batch_size, d1, d2, d3) return Tensor4DShape(batch_size, d1, d2, d3)
elif d1 is not None and d2 is not None:
return Tensor3DShape(batch_size, d1, d2)
elif d1 is not None and d2 is None: elif d1 is not None and d2 is None:
return Tensor2DShape(batch_size, d1) return Tensor2DShape(batch_size, d1)
elif d1 is None and d2 is None and d3 is None: elif d1 is None and d2 is None and d3 is None:
...@@ -24,10 +28,14 @@ def make_tensor(batch_size, d1=None, d2=None, d3=None): ...@@ -24,10 +28,14 @@ def make_tensor(batch_size, d1=None, d2=None, d3=None):
def get_filter_output_shape(i_h, i_w, params, round_func): def get_filter_output_shape(i_h, i_w, params, round_func):
o_h = (i_h + 2 * params.pad_h - params.kernel_h dila_h = getattr(params, 'dila_h', 1)
) / float(params.stride_h) + 1 dila_w = getattr(params, 'dila_w', 1)
o_w = (i_w + 2 * params.pad_w - params.kernel_w
) / float(params.stride_w) + 1 o_h = (i_h + 2 * params.pad_h -
(dila_h * (params.kernel_h - 1) + 1)) / float(params.stride_h) + 1
o_w = (i_w + 2 * params.pad_w -
(dila_w * (params.kernel_w - 1) + 1)) / float(params.stride_w) + 1
return (int(round_func(o_h)), int(round_func(o_w))) return (int(round_func(o_h)), int(round_func(o_w)))
...@@ -97,6 +105,34 @@ def shape_convolution(node): ...@@ -97,6 +105,34 @@ def shape_convolution(node):
return get_strided_kernel_output_shape(node, math.floor) return get_strided_kernel_output_shape(node, math.floor)
def shape_deconvolution(node):
assert node.layer is not None
input_shape = node.get_only_parent().output_shape
h_i = input_shape.height
w_i = input_shape.width
params = node.layer.kernel_parameters
p_h = params.pad_h
p_w = params.pad_w
dila_h = params.dila_h
dila_w = params.dila_w
k_h = params.kernel_h
k_w = params.kernel_w
s_h = params.stride_h
s_w = params.stride_w
h_o = (h_i - 1) * s_h - 2 * p_h + dila_h * (k_h - 1) + 1
w_o = (w_i - 1) * s_w - 2 * p_w + dila_w * (k_w - 1) + 1
params = node.layer.parameters
has_c_o = hasattr(params, 'num_output')
c = params.num_output if has_c_o else input_shape.channels
return make_tensor(input_shape.batch_size, c, h_o, w_o)
def shape_pool(node): def shape_pool(node):
global_pool = getattr(node.layer.parameters, 'global_pooling', False) global_pool = getattr(node.layer.parameters, 'global_pooling', False)
if global_pool: if global_pool:
......
...@@ -325,7 +325,8 @@ class ParameterNamer(object): ...@@ -325,7 +325,8 @@ class ParameterNamer(object):
for node in graph.nodes: for node in graph.nodes:
if node.data is None: if node.data is None:
continue continue
if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct,\
NodeKind.Deconvolution):
names = ('weights', ) names = ('weights', )
if node.parameters.bias_term: if node.parameters.bias_term:
names += ('biases', ) names += ('biases', )
...@@ -337,6 +338,8 @@ class ParameterNamer(object): ...@@ -337,6 +338,8 @@ class ParameterNamer(object):
names = ('scale', ) names = ('scale', )
if getattr(node.parameters, 'bias_term', False): if getattr(node.parameters, 'bias_term', False):
names = ('scale', 'offset') names = ('scale', 'offset')
elif node.kind == "Normalize":
names = ('scale', )
else: else:
warn('Unhandled parameters when naming this it[%s]' % warn('Unhandled parameters when naming this it[%s]' %
(node.kind)) (node.kind))
......
...@@ -34,7 +34,7 @@ tar xf ${valid_tar} -C ${valid_folder} ...@@ -34,7 +34,7 @@ tar xf ${valid_tar} -C ${valid_folder}
echo "Download imagenet label file: val_list.txt & train_list.txt" echo "Download imagenet label file: val_list.txt & train_list.txt"
label_file=ImageNet_label.tgz label_file=ImageNet_label.tgz
label_url=http://imagenet-data.bj.bcebos.com/${label_file} label_url=http://paddle-imagenet-models.bj.bcebos.com/${label_file}
wget -nd -c ${label_url} wget -nd -c ${label_url}
tar zxf ${label_file} tar zxf ${label_file}
...@@ -160,5 +160,5 @@ def val(file_list=TEST_LIST): ...@@ -160,5 +160,5 @@ def val(file_list=TEST_LIST):
return _reader_creator(file_list, 'val', shuffle=False) return _reader_creator(file_list, 'val', shuffle=False)
def test(file_list): def test(file_list=TEST_LIST):
return _reader_creator(file_list, 'test', shuffle=False) return _reader_creator(file_list, 'test', shuffle=False)
...@@ -157,7 +157,8 @@ def train(args): ...@@ -157,7 +157,8 @@ def train(args):
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name)
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
......
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${language_model:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce | python _ce.py
cudaid=${language_model_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0)
imikolov_20_pass_duration_kpi = DurationKpi(
'imikolov_20_pass_duration', 0.02, 0, actived=True)
imikolov_20_avg_ppl_kpi_card4 = CostKpi('imikolov_20_avg_ppl_card4', 0.2, 0)
imikolov_20_pass_duration_kpi_card4 = DurationKpi(
'imikolov_20_pass_duration_card4', 0.03, 0, actived=True)
tracking_kpis = [
imikolov_20_avg_ppl_kpi,
imikolov_20_pass_duration_kpi,
imikolov_20_avg_ppl_kpi_card4,
imikolov_20_pass_duration_kpi_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
import os
import sys import sys
import time import time
import numpy as np import numpy as np
import math import math
import argparse
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
import utils import utils
SEED = 102
def parse_args():
parser = argparse.ArgumentParser("language_model benchmark.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run \
the task with continuous evaluation logs.')
args = parser.parse_args()
return args
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound): def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
""" network definition """ """ network definition """
...@@ -63,31 +77,26 @@ def train(train_reader, ...@@ -63,31 +77,26 @@ def train(train_reader,
init_low_bound=-0.04, init_low_bound=-0.04,
init_high_bound=0.04): init_high_bound=0.04):
""" train network """ """ train network """
args = parse_args()
if args.enable_ce:
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
vocab_size = len(vocab) vocab_size = len(vocab)
#Input data
src_wordseq = fluid.layers.data( src_wordseq = fluid.layers.data(
name="src_wordseq", shape=[1], dtype="int64", lod_level=1) name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
dst_wordseq = fluid.layers.data( dst_wordseq = fluid.layers.data(
name="dst_wordseq", shape=[1], dtype="int64", lod_level=1) name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
# Train program
avg_cost = None avg_cost = None
if not parallel: cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, init_low_bound, init_high_bound)
init_low_bound, init_high_bound) avg_cost = fluid.layers.mean(x=cost)
avg_cost = fluid.layers.mean(x=cost)
else:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
cost = network(
pd.read_input(src_wordseq),
pd.read_input(dst_wordseq), vocab_size, hid_size,
init_low_bound, init_high_bound)
pd.write_output(cost)
cost = pd()
avg_cost = fluid.layers.mean(x=cost)
# Optimization to minimize lost
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr, learning_rate=base_lr,
...@@ -96,39 +105,56 @@ def train(train_reader, ...@@ -96,39 +105,56 @@ def train(train_reader,
staircase=True)) staircase=True))
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
# Initialize executor
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
total_time = 0.0 total_time = 0.0
fetch_list = [avg_cost.name]
for pass_idx in xrange(pass_num): for pass_idx in xrange(pass_num):
epoch_idx = pass_idx + 1 epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx print "epoch_%d start" % epoch_idx
t0 = time.time() t0 = time.time()
i = 0 i = 0
newest_ppl = 0
for data in train_reader(): for data in train_reader():
i += 1 i += 1
lod_src_wordseq = utils.to_lodtensor( lod_src_wordseq = utils.to_lodtensor(
map(lambda x: x[0], data), place) map(lambda x: x[0], data), place)
lod_dst_wordseq = utils.to_lodtensor( lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place) map(lambda x: x[1], data), place)
ret_avg_cost = exe.run(fluid.default_main_program(), ret_avg_cost = train_exe.run(feed={
feed={ "src_wordseq": lod_src_wordseq,
"src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq
"dst_wordseq": lod_dst_wordseq },
}, fetch_list=fetch_list)
fetch_list=[avg_cost], avg_ppl = np.exp(ret_avg_cost[0])
use_program_cache=True) newest_ppl = np.mean(avg_ppl)
avg_ppl = math.exp(ret_avg_cost[0])
if i % 100 == 0: if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, avg_ppl) print "step:%d ppl:%.3f" % (i, newest_ppl)
t1 = time.time() t1 = time.time()
total_time += t1 - t0 total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx) total_time / epoch_idx)
if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
gpu_num = get_cards()
if gpu_num == 1:
print("kpis imikolov_20_pass_duration %s" %
(total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl %s" % newest_ppl)
else:
print("kpis imikolov_20_pass_duration_card%s %s" % \
(gpu_num, total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl_card%s %s" %
(gpu_num, newest_ppl))
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"] feed_var_names = ["src_wordseq", "dst_wordseq"]
fetch_vars = [avg_cost] fetch_vars = [avg_cost]
...@@ -138,11 +164,22 @@ def train(train_reader, ...@@ -138,11 +164,22 @@ def train(train_reader,
print("finish training") print("finish training")
def get_cards(enable_ce):
if enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return fluid.core.get_cuda_device_count()
def train_net(): def train_net():
""" do training """ """ do training """
batch_size = 20 batch_size = 20
args = parse_args()
vocab, train_reader, test_reader = utils.prepare_data( vocab, train_reader, test_reader = utils.prepare_data(
batch_size=batch_size, buffer_size=1000, word_freq_threshold=0) batch_size=batch_size * get_cards(args.enable_ce), buffer_size=1000, \
word_freq_threshold=0, enable_ce = args.enable_ce)
train( train(
train_reader=train_reader, train_reader=train_reader,
vocab=vocab, vocab=vocab,
...@@ -152,7 +189,7 @@ def train_net(): ...@@ -152,7 +189,7 @@ def train_net():
batch_size=batch_size, batch_size=batch_size,
pass_num=12, pass_num=12,
use_cuda=True, use_cuda=True,
parallel=False, parallel=True,
model_dir="model", model_dir="model",
init_low_bound=-0.1, init_low_bound=-0.1,
init_high_bound=0.1) init_high_bound=0.1)
......
...@@ -3,7 +3,7 @@ import time ...@@ -3,7 +3,7 @@ import time
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
def to_lodtensor(data, place): def to_lodtensor(data, place):
...@@ -22,17 +22,28 @@ def to_lodtensor(data, place): ...@@ -22,17 +22,28 @@ def to_lodtensor(data, place):
return res return res
def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0): def prepare_data(batch_size,
buffer_size=1000,
word_freq_threshold=0,
enable_ce=False):
""" prepare the English Pann Treebank (PTB) data """ """ prepare the English Pann Treebank (PTB) data """
vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold) vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold)
train_reader = paddle.batch( if enable_ce:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imikolov.train( paddle.dataset.imikolov.train(
vocab, vocab,
buffer_size, buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ), data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size), batch_size)
batch_size) else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imikolov.test( paddle.dataset.imikolov.test(
vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ), vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ),
......
#!/bin/bash
# This file is only used for continuous evaluation.
rm -rf *_factor.txt
model_file='model.py'
python $model_file --batch_size 128 --pass_num 5 --device CPU | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
tracking_kpis = [
train_acc_kpi,
train_cost_kpi,
test_acc_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
import time
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
SEED = 90
DTYPE = "float32"
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
def parse_args():
parser = argparse.ArgumentParser("mnist model benchmark.")
parser.add_argument(
'--batch_size', type=int, default=128, help='The minibatch size.')
parser.add_argument(
'--iterations', type=int, default=35, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=5, help='The number of passes.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
args = parser.parse_args()
return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def cnn_model(data):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=data,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
# TODO(dzhwinter) : refine the initializer and random seed settting
SIZE = 10
input_shape = conv_pool_2.shape
param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5
predict = fluid.layers.fc(
input=conv_pool_2,
size=SIZE,
act="softmax",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)))
return predict
def eval_test(exe, batch_acc, batch_size_tensor, inference_program):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=args.batch_size)
test_pass_acc = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
acc, weight = exe.run(inference_program,
feed={"pixel": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_pass_acc.add(value=acc, weight=weight)
pass_acc = test_pass_acc.eval()
return pass_acc
def run_benchmark(model, args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
start_time = time.time()
# Input data
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# Train program
predict = model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
# Evaluator
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)
# inference program
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
target_vars=[batch_acc, batch_size_tensor])
# Optimization
opt = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, beta1=0.9, beta2=0.999)
opt.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
# Initialize executor
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# Parameter initialization
exe.run(fluid.default_startup_program())
# Reader
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.average.WeightedAverage()
for pass_id in range(args.pass_num):
accuracy.reset()
pass_start = time.time()
every_pass_loss = []
for batch_id, data in enumerate(train_reader()):
img_data = np.array(
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
start = time.time()
loss, acc, weight = exe.run(
fluid.default_main_program(),
feed={"pixel": img_data,
"label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
) # The accuracy is the accumulation of batches, but not the current batch.
end = time.time()
accuracy.add(value=acc, weight=weight)
every_pass_loss.append(loss)
print("Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, batch_id, loss, acc))
pass_end = time.time()
train_avg_acc = accuracy.eval()
train_avg_loss = np.mean(every_pass_loss)
test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
inference_program)
print(
"pass=%d, train_avg_acc=%f,train_avg_loss=%f, test_avg_acc=%f, elapse=%f"
% (pass_id, train_avg_acc, train_avg_loss, test_avg_acc,
(pass_end - pass_start)))
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
print("kpis train_acc %f" % train_avg_acc)
print("kpis train_cost %f" % train_avg_loss)
print("kpis test_acc %f" % test_avg_acc)
print("kpis train_duration %f" % (pass_end - pass_start))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(cnn_model, args)
else:
run_benchmark(cnn_model, args)
###!/bin/bash
####This file is only used for continuous evaluation.
model_file='train.py'
python $model_file --pass_num 1 --learning_rate 0.001 --save_interval 10 --enable_ce | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import distutils.util
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--embedding_dim",
type=int,
default=512,
help="The dimension of embedding table. (default: %(default)d)")
parser.add_argument(
"--encoder_size",
type=int,
default=512,
help="The size of encoder bi-rnn unit. (default: %(default)d)")
parser.add_argument(
"--decoder_size",
type=int,
default=512,
help="The size of decoder rnn unit. (default: %(default)d)")
parser.add_argument(
"--batch_size",
type=int,
default=32,
help="The sequence number of a mini-batch data. (default: %(default)d)")
parser.add_argument(
"--dict_size",
type=int,
default=30000,
help="The dictionary capacity. Dictionaries of source sequence and "
"target dictionary have same capacity. (default: %(default)d)")
parser.add_argument(
"--pass_num",
type=int,
default=5,
help="The pass number to train. (default: %(default)d)")
parser.add_argument(
"--learning_rate",
type=float,
default=0.01,
help="Learning rate used to train the model. (default: %(default)f)")
parser.add_argument(
"--no_attention",
action='store_true',
help="If set, run no attention model instead of attention model.")
parser.add_argument(
"--beam_size",
type=int,
default=3,
help="The width for beam searching. (default: %(default)d)")
parser.add_argument(
"--use_gpu",
type=distutils.util.strtobool,
default=True,
help="Whether to use gpu. (default: %(default)d)")
parser.add_argument(
"--max_length",
type=int,
default=50,
help="The maximum length of sequence when doing generation. "
"(default: %(default)d)")
parser.add_argument(
"--save_dir",
type=str,
default="model",
help="Specify the path to save trained models.")
parser.add_argument(
"--save_interval",
type=int,
default=1,
help="Save the trained model every n passes."
"(default: %(default)d)")
parser.add_argument(
"--enable_ce",
action='store_true',
help="If set, run the task with continuous evaluation logs.")
args = parser.parse_args()
return args
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.contrib.decoder.beam_search_decoder import *
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
def linear(inputs):
return fluid.layers.fc(input=inputs, size=size, bias_attr=True)
forget_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
input_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
output_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t]))
cell_tilde = fluid.layers.tanh(x=linear([hidden_t_prev, x_t]))
cell_t = fluid.layers.sums(input=[
fluid.layers.elementwise_mul(
x=forget_gate, y=cell_t_prev), fluid.layers.elementwise_mul(
x=input_gate, y=cell_tilde)
])
hidden_t = fluid.layers.elementwise_mul(
x=output_gate, y=fluid.layers.tanh(x=cell_t))
return hidden_t, cell_t
def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
target_dict_dim, is_generating, beam_size, max_length):
"""Construct a seq2seq network."""
def bi_lstm_encoder(input_seq, gate_size):
# A bi-directional lstm encoder implementation.
# Linear transformation part for input gate, output gate, forget gate
# and cell activation vectors need be done outside of dynamic_lstm.
# So the output size is 4 times of gate_size.
input_forward_proj = fluid.layers.fc(input=input_seq,
size=gate_size * 4,
act='tanh',
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj, size=gate_size * 4, use_peepholes=False)
input_reversed_proj = fluid.layers.fc(input=input_seq,
size=gate_size * 4,
act='tanh',
bias_attr=False)
reversed, _ = fluid.layers.dynamic_lstm(
input=input_reversed_proj,
size=gate_size * 4,
is_reverse=True,
use_peepholes=False)
return forward, reversed
# The encoding process. Encodes the input words into tensors.
src_word_idx = fluid.layers.data(
name='source_sequence', shape=[1], dtype='int64', lod_level=1)
src_embedding = fluid.layers.embedding(
input=src_word_idx,
size=[source_dict_dim, embedding_dim],
dtype='float32')
src_forward, src_reversed = bi_lstm_encoder(
input_seq=src_embedding, gate_size=encoder_size)
encoded_vector = fluid.layers.concat(
input=[src_forward, src_reversed], axis=1)
encoded_proj = fluid.layers.fc(input=encoded_vector,
size=decoder_size,
bias_attr=False)
backward_first = fluid.layers.sequence_pool(
input=src_reversed, pool_type='first')
decoder_boot = fluid.layers.fc(input=backward_first,
size=decoder_size,
bias_attr=False,
act='tanh')
cell_init = fluid.layers.fill_constant_batch_size_like(
input=decoder_boot,
value=0.0,
shape=[-1, decoder_size],
dtype='float32')
cell_init.stop_gradient = False
# Create a RNN state cell by providing the input and hidden states, and
# specifies the hidden state as output.
h = InitState(init=decoder_boot, need_reorder=True)
c = InitState(init=cell_init)
state_cell = StateCell(
inputs={'x': None,
'encoder_vec': None,
'encoder_proj': None},
states={'h': h,
'c': c},
out_state='h')
def simple_attention(encoder_vec, encoder_proj, decoder_state):
# The implementation of simple attention model
decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=decoder_size,
bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
# concated lod should inherit from encoder_proj
concated = fluid.layers.concat(
input=[encoder_proj, decoder_state_expand], axis=1)
attention_weights = fluid.layers.fc(input=concated,
size=1,
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
input=attention_weights)
weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
@state_cell.state_updater
def state_updater(state_cell):
# Define the updater of RNN state cell
current_word = state_cell.get_input('x')
encoder_vec = state_cell.get_input('encoder_vec')
encoder_proj = state_cell.get_input('encoder_proj')
prev_h = state_cell.get_state('h')
prev_c = state_cell.get_state('c')
context = simple_attention(encoder_vec, encoder_proj, prev_h)
decoder_inputs = fluid.layers.concat(
input=[context, current_word], axis=1)
h, c = lstm_step(decoder_inputs, prev_h, prev_c, decoder_size)
state_cell.set_state('h', h)
state_cell.set_state('c', c)
# Define the decoding process
if not is_generating:
# Training process
trg_word_idx = fluid.layers.data(
name='target_sequence', shape=[1], dtype='int64', lod_level=1)
trg_embedding = fluid.layers.embedding(
input=trg_word_idx,
size=[target_dict_dim, embedding_dim],
dtype='float32')
# A decoder for training
decoder = TrainingDecoder(state_cell)
with decoder.block():
current_word = decoder.step_input(trg_embedding)
encoder_vec = decoder.static_input(encoded_vector)
encoder_proj = decoder.static_input(encoded_proj)
decoder.state_cell.compute_state(inputs={
'x': current_word,
'encoder_vec': encoder_vec,
'encoder_proj': encoder_proj
})
h = decoder.state_cell.get_state('h')
decoder.state_cell.update_states()
out = fluid.layers.fc(input=h,
size=target_dict_dim,
bias_attr=True,
act='softmax')
decoder.output(out)
label = fluid.layers.data(
name='label_sequence', shape=[1], dtype='int64', lod_level=1)
cost = fluid.layers.cross_entropy(input=decoder(), label=label)
avg_cost = fluid.layers.mean(x=cost)
feeding_list = ["source_sequence", "target_sequence", "label_sequence"]
return avg_cost, feeding_list
else:
# Inference
init_ids = fluid.layers.data(
name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = fluid.layers.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)
# A beam search decoder
decoder = BeamSearchDecoder(
state_cell=state_cell,
init_ids=init_ids,
init_scores=init_scores,
target_dict_dim=target_dict_dim,
word_dim=embedding_dim,
input_var_dict={
'encoder_vec': encoded_vector,
'encoder_proj': encoded_proj
},
topk_size=50,
sparse_emb=True,
max_len=max_length,
beam_size=beam_size,
end_id=1,
name=None)
decoder.decode()
translation_ids, translation_scores = decoder()
feeding_list = ["source_sequence"]
return translation_ids, translation_scores, feeding_list
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import os
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.executor import Executor
from paddle.fluid.contrib.decoder.beam_search_decoder import *
from args import *
import attention_model
import no_attention_model
def infer():
args = parse_args()
# Inference
if args.no_attention:
translation_ids, translation_scores, feed_order = \
no_attention_model.seq_to_seq_net(
args.embedding_dim,
args.encoder_size,
args.decoder_size,
args.dict_size,
args.dict_size,
True,
beam_size=args.beam_size,
max_length=args.max_length)
else:
translation_ids, translation_scores, feed_order = \
attention_model.seq_to_seq_net(
args.embedding_dim,
args.encoder_size,
args.decoder_size,
args.dict_size,
args.dict_size,
True,
beam_size=args.beam_size,
max_length=args.max_length)
test_batch_generator = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
batch_size=args.batch_size,
drop_last=False)
place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
model_path = os.path.join(args.save_dir, str(args.pass_num))
fluid.io.load_persistables(
executor=exe,
dirname=model_path,
main_program=framework.default_main_program())
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(args.dict_size)
feed_list = [
framework.default_main_program().global_block().var(var_name)
for var_name in feed_order[0:1]
]
feeder = fluid.DataFeeder(feed_list, place)
for batch_id, data in enumerate(test_batch_generator()):
# The value of batch_size may vary in the last batch
batch_size = len(data)
# Setup initial ids and scores lod tensor
init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
init_scores_data = np.array(
[1. for _ in range(batch_size)], dtype='float32')
init_ids_data = init_ids_data.reshape((batch_size, 1))
init_scores_data = init_scores_data.reshape((batch_size, 1))
init_recursive_seq_lens = [1] * batch_size
init_recursive_seq_lens = [
init_recursive_seq_lens, init_recursive_seq_lens
]
init_ids = fluid.create_lod_tensor(init_ids_data,
init_recursive_seq_lens, place)
init_scores = fluid.create_lod_tensor(init_scores_data,
init_recursive_seq_lens, place)
# Feed dict for inference
feed_dict = feeder.feed(map(lambda x: [x[0]], data))
feed_dict['init_ids'] = init_ids
feed_dict['init_scores'] = init_scores
fetch_outs = exe.run(framework.default_main_program(),
feed=feed_dict,
fetch_list=[translation_ids, translation_scores],
return_numpy=False)
# Split the output words by lod levels
lod_level_1 = fetch_outs[0].lod()[1]
token_array = np.array(fetch_outs[0])
result = []
for i in xrange(len(lod_level_1) - 1):
sentence_list = [
trg_dict[token]
for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
]
sentence = " ".join(sentence_list[1:-1])
result.append(sentence)
lod_level_0 = fetch_outs[0].lod()[0]
paragraphs = [
result[lod_level_0[i]:lod_level_0[i + 1]]
for i in xrange(len(lod_level_0) - 1)
]
for paragraph in paragraphs:
print(paragraph)
if __name__ == '__main__':
infer()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid.layers as layers
from paddle.fluid.contrib.decoder.beam_search_decoder import *
def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
target_dict_dim, is_generating, beam_size, max_length):
def encoder():
# Encoder implementation of RNN translation
src_word = layers.data(
name="src_word", shape=[1], dtype='int64', lod_level=1)
src_embedding = layers.embedding(
input=src_word,
size=[source_dict_dim, embedding_dim],
dtype='float32',
is_sparse=True)
fc1 = layers.fc(input=src_embedding, size=encoder_size * 4, act='tanh')
lstm_hidden0, lstm_0 = layers.dynamic_lstm(
input=fc1, size=encoder_size * 4)
encoder_out = layers.sequence_last_step(input=lstm_hidden0)
return encoder_out
def decoder_state_cell(context):
# Decoder state cell, specifies the hidden state variable and its updater
h = InitState(init=context, need_reorder=True)
state_cell = StateCell(
inputs={'x': None}, states={'h': h}, out_state='h')
@state_cell.state_updater
def updater(state_cell):
current_word = state_cell.get_input('x')
prev_h = state_cell.get_state('h')
# make sure lod of h heritted from prev_h
h = layers.fc(input=[prev_h, current_word],
size=decoder_size,
act='tanh')
state_cell.set_state('h', h)
return state_cell
def decoder_train(state_cell):
# Decoder for training implementation of RNN translation
trg_word = layers.data(
name="target_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = layers.embedding(
input=trg_word,
size=[target_dict_dim, embedding_dim],
dtype='float32',
is_sparse=True)
# A training decoder
decoder = TrainingDecoder(state_cell)
# Define the computation in each RNN step done by decoder
with decoder.block():
current_word = decoder.step_input(trg_embedding)
decoder.state_cell.compute_state(inputs={'x': current_word})
current_score = layers.fc(input=decoder.state_cell.get_state('h'),
size=target_dict_dim,
act='softmax')
decoder.state_cell.update_states()
decoder.output(current_score)
return decoder()
def decoder_infer(state_cell):
# Decoder for inference implementation
init_ids = layers.data(
name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = layers.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)
# A beam search decoder for inference
decoder = BeamSearchDecoder(
state_cell=state_cell,
init_ids=init_ids,
init_scores=init_scores,
target_dict_dim=target_dict_dim,
word_dim=embedding_dim,
input_var_dict={},
topk_size=50,
sparse_emb=True,
max_len=max_length,
beam_size=beam_size,
end_id=1,
name=None)
decoder.decode()
translation_ids, translation_scores = decoder()
return translation_ids, translation_scores
context = encoder()
state_cell = decoder_state_cell(context)
if not is_generating:
label = layers.data(
name="target_next_word", shape=[1], dtype='int64', lod_level=1)
rnn_out = decoder_train(state_cell)
cost = layers.cross_entropy(input=rnn_out, label=label)
avg_cost = layers.mean(x=cost)
feeding_list = ['src_word', 'target_word', 'target_next_word']
return avg_cost, feeding_list
else:
translation_ids, translation_scores = decoder_infer(state_cell)
feeding_list = ['src_word']
return translation_ids, translation_scores, feeding_list
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import time
import os
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.executor import Executor
from paddle.fluid.contrib.decoder.beam_search_decoder import *
from args import *
import attention_model
import no_attention_model
def train():
args = parse_args()
if args.enable_ce:
framework.default_startup_program().random_seed = 111
# Training process
if args.no_attention:
avg_cost, feed_order = no_attention_model.seq_to_seq_net(
args.embedding_dim,
args.encoder_size,
args.decoder_size,
args.dict_size,
args.dict_size,
False,
beam_size=args.beam_size,
max_length=args.max_length)
else:
avg_cost, feed_order = attention_model.seq_to_seq_net(
args.embedding_dim,
args.encoder_size,
args.decoder_size,
args.dict_size,
args.dict_size,
False,
beam_size=args.beam_size,
max_length=args.max_length)
# clone from default main program and use it as the validation program
main_program = fluid.default_main_program()
inference_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(
learning_rate=args.learning_rate,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-5))
optimizer.minimize(avg_cost)
# Disable shuffle for Continuous Evaluation only
if not args.enable_ce:
train_batch_generator = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(args.dict_size), buf_size=1000),
batch_size=args.batch_size,
drop_last=False)
test_batch_generator = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
batch_size=args.batch_size,
drop_last=False)
else:
train_batch_generator = paddle.batch(
paddle.dataset.wmt14.train(args.dict_size),
batch_size=args.batch_size,
drop_last=False)
test_batch_generator = paddle.batch(
paddle.dataset.wmt14.test(args.dict_size),
batch_size=args.batch_size,
drop_last=False)
place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
feed_list = [
main_program.global_block().var(var_name) for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list, place)
def validation():
# Use test set as validation each pass
total_loss = 0.0
count = 0
val_feed_list = [
inference_program.global_block().var(var_name)
for var_name in feed_order
]
val_feeder = fluid.DataFeeder(val_feed_list, place)
for batch_id, data in enumerate(test_batch_generator()):
val_fetch_outs = exe.run(inference_program,
feed=val_feeder.feed(data),
fetch_list=[avg_cost],
return_numpy=False)
total_loss += np.array(val_fetch_outs[0])[0]
count += 1
return total_loss / count
for pass_id in range(1, args.pass_num + 1):
pass_start_time = time.time()
words_seen = 0
for batch_id, data in enumerate(train_batch_generator()):
words_seen += len(data) * 2
fetch_outs = exe.run(framework.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
avg_cost_train = np.array(fetch_outs[0])
print('pass_id=%d, batch_id=%d, train_loss: %f' %
(pass_id, batch_id, avg_cost_train))
# This is for continuous evaluation only
if args.enable_ce and batch_id >= 100:
break
pass_end_time = time.time()
test_loss = validation()
time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed
print("pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f" %
(pass_id, test_loss, words_per_sec, time_consumed))
# This log is for continuous evaluation only
if args.enable_ce:
print("kpis\ttrain_cost\t%f" % avg_cost_train)
print("kpis\ttest_cost\t%f" % test_loss)
print("kpis\ttrain_duration\t%f" % time_consumed)
if pass_id % args.save_interval == 0:
model_path = os.path.join(args.save_dir, str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(
executor=exe,
dirname=model_path,
main_program=framework.default_main_program())
if __name__ == '__main__':
train()
...@@ -9,13 +9,14 @@ ...@@ -9,13 +9,14 @@
```text ```text
. .
├── images # README 文档中的图片 ├── images # README 文档中的图片
├── optim.py # learning rate scheduling 计算程序 ├── config.py # 训练、预测以及模型参数配置
├── infer.py # 预测脚本 ├── infer.py # 预测脚本
├── model.py # 模型定义 ├── model.py # 模型定义
├── optim.py # learning rate scheduling 计算程序
├── reader.py # 数据读取接口 ├── reader.py # 数据读取接口
├── README.md # 文档 ├── README.md # 文档
├── train.py # 训练脚本 ├── train.py # 训练脚本
└── config.py # 训练、预测以及模型参数配置 └── util.py # wordpiece 数据解码工具
``` ```
### 简介 ### 简介
...@@ -58,34 +59,43 @@ Decoder 具有和 Encoder 类似的结构,只是相比于组成 Encoder 的 la ...@@ -58,34 +59,43 @@ Decoder 具有和 Encoder 类似的结构,只是相比于组成 Encoder 的 la
### 数据准备 ### 数据准备
我们以 [WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)作为示例,同时参照论文中的设置使用 BPE(byte-pair encoding)[4]编码的数据,使用这种方式表示的数据能够更好的解决未登录词(out-of-vocabulary,OOV)的问题。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载,下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。 WMT 数据集是机器翻译领域公认的主流数据集;WMT 英德和英法数据集也是 Transformer 论文中所用数据集,其中英德数据集使用了 BPE(byte-pair encoding)[4]编码的数据,英法数据集使用了 wordpiece [5]的数据。我们这里也将使用 WMT 英德和英法翻译数据,并和论文保持一致使用 BPE 和 wordpiece 的数据,下面给出了使用的方法。对于其他自定义数据,参照下文遵循或转换为类似的数据格式即可。
#### WMT 英德翻译数据
由于本示例中的数据读取脚本 `reader.py` 使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(句子中的词之间使用空格分隔), 因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并: [WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。
由于本示例中的数据读取脚本 `reader.py` 默认使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(默认句子中的词之间使用空格分隔),因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并:
```sh ```sh
paste -d '\t' train.tok.clean.bpe.32000.en train.tok.clean.bpe.32000.de > train.tok.clean.bpe.32000.en-de paste -d '\t' train.tok.clean.bpe.32000.en train.tok.clean.bpe.32000.de > train.tok.clean.bpe.32000.en-de
``` ```
此外,下载的词典文件 `vocab.bpe.32000` 中未包含表示序列开始、序列结束和未登录词的特殊符号,可以使用如下命令在词典中加入 `<s>``<e>``<unk>` 作为这三个特殊符号。 此外,下载的词典文件 `vocab.bpe.32000` 中未包含表示序列开始、序列结束和未登录词的特殊符号,可以使用如下命令在词典中加入 `<s>``<e>``<unk>` 作为这三个特殊符号(用 BPE 表示数据已有效避免了未登录词的问题,这里加入只是做通用处理)
```sh ```sh
sed -i '1i\<s>\n<e>\n<unk>' vocab.bpe.32000 sed -i '1i\<s>\n<e>\n<unk>' vocab.bpe.32000
``` ```
对于其他自定义数据,遵循或转换为上述的数据格式即可。如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理。 #### WMT 英法翻译数据
[WMT'14 EN-FR 数据集](http://www.statmt.org/wmt14/translation-task.html)是一个较大规模的数据集。参照论文,英法数据我们使用 wordpiece 表示的数据,wordpiece 和 BPE 类似同为采用 sub-word units 来解决 OOV 问题的方法[5]。我们提供了已完成预处理的 wordpiece 数据的下载,可以从[这里](http://transformer-data.bj.bcebos.com/wmt14_enfr.tar)下载,其中 `train.wordpiece.en-fr` 为使用 wordpiece 的训练数据,`newstest2014.wordpiece.en-fr` 为测试数据(`newstest2014.tok.en``newstest2014.tok.fr` 为对应的未经 wordpiece 处理过的测试数据,使用[脚本](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl)进行了 tokenize 的处理),`vocab.wordpiece.en-fr` 为相应的词典文件(源语言和目标语言共享该词典文件)。
提供的英法翻译数据无需进行额外的处理,可以直接使用;需要注意的是,这些用 wordpiece 表示的数据中句子内的 token 之间使用 `\x01` 而非空格进行分隔(因部分 token 内包含空格),这需要在训练时进行指定。
### 模型训练 ### 模型训练
`train.py` 是模型训练脚本,可以执行以下命令进行模型训练: `train.py` 是模型训练脚本。以英德翻译数据为例,可以执行以下命令进行模型训练:
```sh ```sh
python -u train.py \ python -u train.py \
--src_vocab_fpath data/vocab.bpe.32000 \ --src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \ --train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \
--use_token_batch True \ --use_token_batch True \
--batch_size 3200 \ --batch_size 3200 \
--sort_type pool \ --sort_type pool \
--pool_size 200000 \ --pool_size 200000
``` ```
上述命令中设置了源语言词典文件路径(`src_vocab_fpath`)、目标语言词典文件路径(`trg_vocab_fpath`)、训练数据文件(`train_file_pattern`,支持通配符)等数据相关的参数和构造 batch 方式(`use_token_batch`数据按照 token 数目或者 sequence 数目组成 batch)等 reader 相关的参数。有关这些参数更详细的信息可以通过执行以下命令查看: 上述命令中设置了源语言词典文件路径(`src_vocab_fpath`)、目标语言词典文件路径(`trg_vocab_fpath`)、训练数据文件(`train_file_pattern`,支持通配符)等数据相关的参数和构造 batch 方式(`use_token_batch`定了数据按照 token 数目或者 sequence 数目组成 batch)等 reader 相关的参数。有关这些参数更详细的信息可以通过执行以下命令查看:
```sh ```sh
python train.py --help python train.py --help
``` ```
...@@ -98,19 +108,20 @@ python -u train.py \ ...@@ -98,19 +108,20 @@ python -u train.py \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \ --train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \
--use_token_batch True \ --use_token_batch True \
--batch_size 3200 \ --batch_size 3200 \
--sort_type pool \ --sort_type pool \
--pool_size 200000 \ --pool_size 200000 \
n_layer 8 \ n_layer 6 \
n_head 16 \ n_head 16 \
d_model 1024 \ d_model 1024 \
d_inner_hid 4096 \ d_inner_hid 4096 \
dropout 0.3 dropout 0.3
``` ```
有关这些参数更详细信息的还请参考 `config.py` 中的注释说明 有关这些参数更详细信息的请参考 `config.py` 中的注释说明。对于英法翻译数据,执行训练和英德翻译训练类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外要注意的是由于英法翻译数据 token 间不是使用空格进行分隔,需要修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`
训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 iteration 将打印如下的日志到标准输出: 训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 epoch 内也会每隔1000个 iteration 进行一次保存,每个 iteration 将打印如下的日志到标准输出:
```txt ```txt
epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531 epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531
epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438 epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438
...@@ -126,38 +137,120 @@ epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187 ...@@ -126,38 +137,120 @@ epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187
### 模型预测 ### 模型预测
`infer.py` 是模型预测脚本,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译: `infer.py` 是模型预测脚本。以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译:
```sh ```sh
python -u infer.py \ python -u infer.py \
--src_vocab_fpath data/vocab.bpe.32000 \ --src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--test_file_pattern data/newstest2013.tok.bpe.32000.en-de \ --test_file_pattern data/newstest2013.tok.bpe.32000.en-de \
--use_wordpiece False \
--token_delimiter ' ' \
--batch_size 4 \ --batch_size 4 \
model_path trained_models/pass_20.infer.model \ model_path trained_models/pass_20.infer.model \
beam_size 5 beam_size 5 \
max_out_len 256 max_out_len 256
``` ```
和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。 和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。
执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。需要注意,对于使用 BPE 的数据,预测出的翻译结果也将是 BPE 表示的数据,要恢复成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中。 执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。对于使用 BPE 的英德数据,预测出的翻译结果也将是 BPE 表示的数据,要还原成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中(无需再次 tokenize 处理):
```sh ```sh
sed 's/@@ //g' predict.txt > predict.tok.txt sed 's/@@ //g' predict.txt > predict.tok.txt
``` ```
接下来就可以使用参考翻译(这里使用的是 `newstest2013.tok.de`)对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的一个较为广泛使用的脚本可以从[这里](https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl)获取,获取后执行如下命令: 对于英法翻译的 wordpiece 数据,执行预测和英德翻译预测类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外需要注意修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`;同时要修改 `use_wordpiece` 参数的设置为 `--use_wordpiece True`,这会在预测时将翻译得到的 wordpiece 数据还原为原始数据输出。为了使用 tokenize 的数据进行评估,还需要对翻译结果进行 tokenize 的处理,[Moses](https://github.com/moses-smt/mosesdecoder) 提供了一系列机器翻译相关的脚本。执行 `git clone https://github.com/moses-smt/mosesdecoder.git` 克隆 mosesdecoder 仓库后,可以使用其中的 `tokenizer.perl` 脚本对 `predict.txt` 内的翻译结果进行 tokenize 处理并输出到 `predict.tok.txt` 中,如下:
```sh
perl mosesdecoder/scripts/tokenizer/tokenizer.perl -l fr < predict.txt > predict.tok.txt
```
接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2013.tok.de` 数据为例,执行如下命令:
```sh ```sh
perl multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt
``` ```
可以看到类似如下的结果。 可以看到类似如下的结果。
``` ```
BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412) BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412)
``` ```
目前在未使用 model average 的情况下,使用默认配置单机八卡(同论文中 base model 的配置)进行训练,英德翻译在 `newstest2013` 上测试 BLEU 值为25.,在 `newstest2014` 上测试 BLEU 值为26.;英法翻译在 `newstest2014` 上测试 BLEU 值为36.。
### 参考文献 ### 分布式训练
Transformer 模型支持同步或者异步的分布式训练。分布式的配置主要两个方面:
1 命令行配置
- `--local`,有两个取值,`True`表示单机训练,而`False`表示使用分布式训练。默认为单机训练模式。
- `--sync`,有两个取值,但只有当`--local`参数为False才会产生影响,其中`True`表示同步训练模式,`False`表示异步训练模式。默认为同步训练模式。
2 环境变量配置
在分布式训练模式下,会手动配置训练的trainer数量和pserver数量。在网络拓扑上,每一个trainer都会和每一个pserver相连,pserver作为服务端,而trainer作为客户端。下面分pserver和trainer说明具体的参数配置:
1) pserver配置
- `PADDLE_IS_LOCAL=[0|1]` 是否是分布式训练,`0`标识是分布式,`1`标识是单机
- `TRAINING_ROLE=PSERVER` 标识当前节点是pserver
- `POD_IP=ip` 设置当前pserver使用对外服务的地址
- `PADDLE_PORT=port` 设置当前pserver对外服务监听端口号,和`POD_IP`共同构成对外的唯一标识
- `PADDLE_TRAINERS_NUM=num` 设置pserver连接的trainer的数量
下面是配置的示例, 使用两个pserver, 192.168.2.2上的配置如下:
```
export PADDLE_PSERVERS=192.168.2.2,192.168.2.3
export POD_IP=192.168.2.2
export PADDLE_TRAINERS_NUM=2
export TRAINING_ROLE=PSERVER
export PADDLE_IS_LOCAL=0
export PADDLE_PORT=6177
```
192.168.2.3上的配置如下:
```
export PADDLE_PSERVERS=192.168.2.2,192.168.2.3
export POD_IP=192.168.2.3
export PADDLE_TRAINERS_NUM=2
export TRAINING_ROLE=PSERVER
export PADDLE_IS_LOCAL=0
export PADDLE_PORT=6177
```
2) trainer配置
- `PADDLE_IS_LOCAL=[0|1]` 是否是分布式训练,`0`标识是分布式,`1`标识是单机
- `TRAINING_ROLE=TRAINER` 标识当前节点是trainer
- `PADDLE_PSERVERS=[ip1,ip2,……]` 设置pserver的ip地址,用于告知trainer互联的pserver的ip, 使用`,`分割
- `PADDLE_TRAINER_ID=num` 设置当前节点的编号, 编号的取值范围为0到N-1的整数
- `PADDLE_PORT=port` 设置请求的pserver服务端口号
下面是配置的示例, 使用两个trainer, trainer 1上的配置如下:
```
export TRAINING_ROLE=TRAINER
export PADDLE_PSERVERS=192.168.2.2,192.168.2.3
export PADDLE_TRAINERS_NUM=2
export PADDLE_TRAINER_ID=0
export PADDLE_IS_LOCAL=0
export PADDLE_PORT=6177
```
trainer 2上的配置如下:
```
export TRAINING_ROLE=TRAINER
export PADDLE_PSERVERS=192.168.2.2,192.168.2.3
export PADDLE_TRAINERS_NUM=2
export PADDLE_TRAINER_ID=1
export PADDLE_IS_LOCAL=0
export PADDLE_PORT=6177
```
### 参考文献
1. Vaswani A, Shazeer N, Parmar N, et al. [Attention is all you need](http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf)[C]//Advances in Neural Information Processing Systems. 2017: 6000-6010. 1. Vaswani A, Shazeer N, Parmar N, et al. [Attention is all you need](http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf)[C]//Advances in Neural Information Processing Systems. 2017: 6000-6010.
2. He K, Zhang X, Ren S, et al. [Deep residual learning for image recognition](http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf)[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. 2. He K, Zhang X, Ren S, et al. [Deep residual learning for image recognition](http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf)[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778.
3. Ba J L, Kiros J R, Hinton G E. [Layer normalization](https://arxiv.org/pdf/1607.06450.pdf)[J]. arXiv preprint arXiv:1607.06450, 2016. 3. Ba J L, Kiros J R, Hinton G E. [Layer normalization](https://arxiv.org/pdf/1607.06450.pdf)[J]. arXiv preprint arXiv:1607.06450, 2016.
4. Sennrich R, Haddow B, Birch A. [Neural machine translation of rare words with subword units](https://arxiv.org/pdf/1508.07909)[J]. arXiv preprint arXiv:1508.07909, 2015. 4. Sennrich R, Haddow B, Birch A. [Neural machine translation of rare words with subword units](https://arxiv.org/pdf/1508.07909)[J]. arXiv preprint arXiv:1508.07909, 2015.
5. Wu Y, Schuster M, Chen Z, et al. [Google's neural machine translation system: Bridging the gap between human and machine translation](https://arxiv.org/pdf/1609.08144.pdf)[J]. arXiv preprint arXiv:1609.08144, 2016.
import argparse import argparse
import ast
import numpy as np import numpy as np
from functools import partial
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -11,6 +13,7 @@ from model import fast_decode as fast_decoder ...@@ -11,6 +13,7 @@ from model import fast_decode as fast_decoder
from config import * from config import *
from train import pad_batch_data from train import pad_batch_data
import reader import reader
import util
def parse_args(): def parse_args():
...@@ -46,6 +49,22 @@ def parse_args(): ...@@ -46,6 +49,22 @@ def parse_args():
default=["<s>", "<e>", "<unk>"], default=["<s>", "<e>", "<unk>"],
nargs=3, nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--use_wordpiece",
type=ast.literal_eval,
default=False,
help="The flag indicating if the data in wordpiece. The EN-FR data "
"we provided is wordpiece data. For wordpiece data, converting ids to "
"original words is a little different and some special codes are "
"provided in util.py to do this.")
parser.add_argument(
"--token_delimiter",
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter.; "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument( parser.add_argument(
'opts', 'opts',
help='See config.py for all options', help='See config.py for all options',
...@@ -320,7 +339,7 @@ def post_process_seq(seq, ...@@ -320,7 +339,7 @@ def post_process_seq(seq,
seq) seq)
def py_infer(test_data, trg_idx2word): def py_infer(test_data, trg_idx2word, use_wordpiece):
""" """
Inference by beam search implented by python, while the calculations from Inference by beam search implented by python, while the calculations from
symbols to probilities execute by Fluid operators. symbols to probilities execute by Fluid operators.
...@@ -399,7 +418,10 @@ def py_infer(test_data, trg_idx2word): ...@@ -399,7 +418,10 @@ def py_infer(test_data, trg_idx2word):
seqs = map(post_process_seq, batch_seqs[i]) seqs = map(post_process_seq, batch_seqs[i])
scores = batch_scores[i] scores = batch_scores[i]
for seq in seqs: for seq in seqs:
print(" ".join([trg_idx2word[idx] for idx in seq])) if use_wordpiece:
print(util.subword_ids_to_str(seq, trg_idx2word))
else:
print(" ".join([trg_idx2word[idx] for idx in seq]))
def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx,
...@@ -465,7 +487,7 @@ def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, ...@@ -465,7 +487,7 @@ def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx,
return input_dict return input_dict
def fast_infer(test_data, trg_idx2word): def fast_infer(test_data, trg_idx2word, use_wordpiece):
""" """
Inference by beam search decoder based solely on Fluid operators. Inference by beam search decoder based solely on Fluid operators.
""" """
...@@ -520,7 +542,9 @@ def fast_infer(test_data, trg_idx2word): ...@@ -520,7 +542,9 @@ def fast_infer(test_data, trg_idx2word):
trg_idx2word[idx] trg_idx2word[idx]
for idx in post_process_seq( for idx in post_process_seq(
np.array(seq_ids)[sub_start:sub_end]) np.array(seq_ids)[sub_start:sub_end])
])) ]) if not use_wordpiece else util.subtoken_ids_to_str(
post_process_seq(np.array(seq_ids)[sub_start:sub_end]),
trg_idx2word))
scores[i].append(np.array(seq_scores)[sub_end - 1]) scores[i].append(np.array(seq_scores)[sub_end - 1])
print hyps[i][-1] print hyps[i][-1]
if len(hyps[i]) >= InferTaskConfig.n_best: if len(hyps[i]) >= InferTaskConfig.n_best:
...@@ -534,8 +558,9 @@ def infer(args, inferencer=fast_infer): ...@@ -534,8 +558,9 @@ def infer(args, inferencer=fast_infer):
src_vocab_fpath=args.src_vocab_fpath, src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.test_file_pattern, fpattern=args.test_file_pattern,
batch_size=args.batch_size, token_delimiter=args.token_delimiter,
use_token_batch=False, use_token_batch=False,
batch_size=args.batch_size,
pool_size=args.pool_size, pool_size=args.pool_size,
sort_type=reader.SortType.NONE, sort_type=reader.SortType.NONE,
shuffle=False, shuffle=False,
...@@ -548,7 +573,7 @@ def infer(args, inferencer=fast_infer): ...@@ -548,7 +573,7 @@ def infer(args, inferencer=fast_infer):
clip_last_batch=False) clip_last_batch=False)
trg_idx2word = test_data.load_dict( trg_idx2word = test_data.load_dict(
dict_path=args.trg_vocab_fpath, reverse=True) dict_path=args.trg_vocab_fpath, reverse=True)
inferencer(test_data, trg_idx2word) inferencer(test_data, trg_idx2word, args.use_wordpiece)
if __name__ == "__main__": if __name__ == "__main__":
......
import os
import time
import argparse
import ast
import numpy as np
import multiprocessing
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from train import split_data, read_multiple, prepare_batch_input
from model import transformer, position_encoding_init
from optim import LearningRateScheduler
from config import *
import reader
def parse_args():
parser = argparse.ArgumentParser(
"Profile the training process for Transformer.")
parser.add_argument(
"--src_vocab_fpath",
type=str,
required=True,
help="The path of vocabulary file of source language.")
parser.add_argument(
"--trg_vocab_fpath",
type=str,
required=True,
help="The path of vocabulary file of target language.")
parser.add_argument(
"--train_file_pattern",
type=str,
required=True,
help="The pattern to match training data files.")
parser.add_argument(
"--use_token_batch",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to "
"produce batch data according to token number.")
parser.add_argument(
"--batch_size",
type=int,
default=2048,
help="The number of sequences contained in a mini-batch, or the maximum "
"number of tokens (include paddings) contained in a mini-batch. Note "
"that this represents the number on single device and the actual batch "
"size for multi-devices will multiply the device number.")
parser.add_argument(
"--num_iters",
type=int,
default=10,
help="The maximum number of iterations profiling over.")
parser.add_argument(
"--pool_size",
type=int,
default=10000,
help="The buffer size to pool data.")
parser.add_argument(
"--special_token",
type=str,
default=["<s>", "<e>", "<unk>"],
nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
'opts',
help='See config.py for all options',
default=None,
nargs=argparse.REMAINDER)
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
args = parser.parse_args()
# Append args related to dict
src_dict = reader.DataReader.load_dict(args.src_vocab_fpath)
trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath)
dict_args = [
"src_vocab_size", str(len(src_dict)), "trg_vocab_size",
str(len(trg_dict)), "bos_idx", str(src_dict[args.special_token[0]]),
"eos_idx", str(src_dict[args.special_token[1]]), "unk_idx",
str(src_dict[args.special_token[2]])
]
merge_cfg_from_list(args.opts + dict_args,
[TrainTaskConfig, ModelHyperParams])
return args
def train_loop(exe, train_progm, init, num_iters, train_data, dev_count,
sum_cost, avg_cost, lr_scheduler, token_num, predict):
data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
-1] + label_data_input_fields
util_input_names = encoder_util_input_fields + decoder_util_input_fields
start_time = time.time()
exec_time = 0.0
for batch_id, data in enumerate(train_data()):
if batch_id >= num_iters:
break
feed_list = []
total_num_token = 0
for place_id, data_buffer in enumerate(
split_data(
data, num_part=dev_count)):
data_input_dict, util_input_dict, num_token = prepare_batch_input(
data_buffer, data_input_names, util_input_names,
ModelHyperParams.eos_idx, ModelHyperParams.eos_idx,
ModelHyperParams.n_head, ModelHyperParams.d_model)
total_num_token += num_token
feed_kv_pairs = data_input_dict.items() + util_input_dict.items()
lr_rate = lr_scheduler.update_learning_rate()
feed_kv_pairs += {lr_scheduler.learning_rate.name: lr_rate}.items()
feed_list.append(dict(feed_kv_pairs))
if not init:
for pos_enc_param_name in pos_enc_param_names:
pos_enc = position_encoding_init(
ModelHyperParams.max_length + 1,
ModelHyperParams.d_model)
feed_list[place_id][pos_enc_param_name] = pos_enc
for feed_dict in feed_list:
feed_dict[sum_cost.name + "@GRAD"] = 1. / total_num_token
exe_start_time = time.time()
if dev_count > 1:
# prallel executor
outs = exe.run(fetch_list=[sum_cost.name, token_num.name],
feed=feed_list)
else:
# executor
outs = exe.run(fetch_list=[sum_cost, token_num], feed=feed_list[0])
exec_time += time.time() - exe_start_time
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1])
total_sum_cost = sum_cost_val.sum() # sum the cost from multi-devices
total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num
print("batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(batch_id, total_sum_cost, total_avg_cost,
np.exp([min(total_avg_cost, 100)])))
init = True
return time.time() - start_time, exec_time
def profile(args):
print args
if args.device == 'CPU':
TrainTaskConfig.use_gpu = False
if not TrainTaskConfig.use_gpu:
place = fluid.CPUPlace()
dev_count = multiprocessing.cpu_count()
else:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
exe = fluid.Executor(place)
sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_scheduler.learning_rate,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost)
# Initialize the parameters.
if TrainTaskConfig.ckpt_path:
fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path)
lr_scheduler.current_steps = TrainTaskConfig.start_step
else:
exe.run(fluid.framework.default_startup_program())
# Disable all sorts for they will be done in the 1st batch.
train_data = reader.DataReader(
src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.train_file_pattern,
use_token_batch=args.use_token_batch,
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size,
sort_type='none',
shuffle=False,
shuffle_batch=False,
start_mark=args.special_token[0],
end_mark=args.special_token[1],
unk_mark=args.special_token[2],
# count start and end tokens out
max_length=ModelHyperParams.max_length - 2,
clip_last_batch=False)
train_data = read_multiple(
reader=train_data.batch_generator,
count=dev_count if args.use_token_batch else 1)
if dev_count > 1:
build_strategy = fluid.BuildStrategy()
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
loss_name=sum_cost.name,
main_program=fluid.default_main_program(),
build_strategy=build_strategy)
print("Warming up ...")
train_loop(exe if dev_count == 1 else train_exe,
fluid.default_main_program(), False, 3, train_data, dev_count,
sum_cost, avg_cost, lr_scheduler, token_num, predict)
print("\nProfiling ...")
if dev_count == 1:
with profiler.profiler('All', 'total', '/tmp/profile_file'):
total_time, exec_time = train_loop(
exe,
fluid.default_main_program(), True, args.num_iters, train_data,
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict)
else:
total_time, exec_time = train_loop(
train_exe,
fluid.default_main_program(), True, args.num_iters, train_data,
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict)
print("Elapsed time: total %f s, in executor %f s" %
(total_time, exec_time))
if __name__ == "__main__":
args = parse_args()
profile(args)
...@@ -116,9 +116,12 @@ class DataReader(object): ...@@ -116,9 +116,12 @@ class DataReader(object):
:param use_token_batch: Whether to produce batch data according to :param use_token_batch: Whether to produce batch data according to
token number. token number.
:type use_token_batch: bool :type use_token_batch: bool
:param delimiter: The delimiter used to split source and target in each :param field_delimiter: The delimiter used to split source and target in
line of data file. each line of data file.
:type delimiter: basestring :type field_delimiter: basestring
:param token_delimiter: The delimiter used to split tokens in source or
target sentences.
:type token_delimiter: basestring
:param start_mark: The token representing for the beginning of :param start_mark: The token representing for the beginning of
sentences in dictionary. sentences in dictionary.
:type start_mark: basestring :type start_mark: basestring
...@@ -145,7 +148,8 @@ class DataReader(object): ...@@ -145,7 +148,8 @@ class DataReader(object):
shuffle=True, shuffle=True,
shuffle_batch=False, shuffle_batch=False,
use_token_batch=False, use_token_batch=False,
delimiter="\t", field_delimiter="\t",
token_delimiter=" ",
start_mark="<s>", start_mark="<s>",
end_mark="<e>", end_mark="<e>",
unk_mark="<unk>", unk_mark="<unk>",
...@@ -164,7 +168,8 @@ class DataReader(object): ...@@ -164,7 +168,8 @@ class DataReader(object):
self._shuffle_batch = shuffle_batch self._shuffle_batch = shuffle_batch
self._min_length = min_length self._min_length = min_length
self._max_length = max_length self._max_length = max_length
self._delimiter = delimiter self._field_delimiter = field_delimiter
self._token_delimiter = token_delimiter
self._epoch_batches = [] self._epoch_batches = []
src_seq_words, trg_seq_words = self._load_data(fpattern, tar_fname) src_seq_words, trg_seq_words = self._load_data(fpattern, tar_fname)
...@@ -196,7 +201,7 @@ class DataReader(object): ...@@ -196,7 +201,7 @@ class DataReader(object):
trg_seq_words = [] trg_seq_words = []
for line in f_obj: for line in f_obj:
fields = line.strip().split(self._delimiter) fields = line.strip().split(self._field_delimiter)
if (not self._only_src and len(fields) != 2) or (self._only_src and if (not self._only_src and len(fields) != 2) or (self._only_src and
len(fields) != 1): len(fields) != 1):
...@@ -207,7 +212,7 @@ class DataReader(object): ...@@ -207,7 +212,7 @@ class DataReader(object):
max_len = -1 max_len = -1
for i, seq in enumerate(fields): for i, seq in enumerate(fields):
seq_words = seq.split() seq_words = seq.split(self._token_delimiter)
max_len = max(max_len, len(seq_words)) max_len = max(max_len, len(seq_words))
if len(seq_words) == 0 or \ if len(seq_words) == 0 or \
len(seq_words) < self._min_length or \ len(seq_words) < self._min_length or \
...@@ -258,9 +263,9 @@ class DataReader(object): ...@@ -258,9 +263,9 @@ class DataReader(object):
with open(dict_path, "r") as fdict: with open(dict_path, "r") as fdict:
for idx, line in enumerate(fdict): for idx, line in enumerate(fdict):
if reverse: if reverse:
word_dict[idx] = line.strip() word_dict[idx] = line.strip('\n')
else: else:
word_dict[line.strip()] = idx word_dict[line.strip('\n')] = idx
return word_dict return word_dict
def _sample_generator(self): def _sample_generator(self):
......
...@@ -3,6 +3,8 @@ import time ...@@ -3,6 +3,8 @@ import time
import argparse import argparse
import ast import ast
import numpy as np import numpy as np
import multiprocessing
from functools import partial
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -75,11 +77,33 @@ def parse_args(): ...@@ -75,11 +77,33 @@ def parse_args():
default=["<s>", "<e>", "<unk>"], default=["<s>", "<e>", "<unk>"],
nargs=3, nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--token_delimiter",
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument( parser.add_argument(
'opts', 'opts',
help='See config.py for all options', help='See config.py for all options',
default=None, default=None,
nargs=argparse.REMAINDER) nargs=argparse.REMAINDER)
parser.add_argument(
'--local',
type=ast.literal_eval,
default=True,
help='Whether to run as local mode.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
parser.add_argument(
'--sync', type=ast.literal_eval, default=True, help="sync mode.")
args = parser.parse_args() args = parser.parse_args()
# Append args related to dict # Append args related to dict
src_dict = reader.DataReader.load_dict(args.src_vocab_fpath) src_dict = reader.DataReader.load_dict(args.src_vocab_fpath)
...@@ -247,40 +271,81 @@ def split_data(data, num_part): ...@@ -247,40 +271,81 @@ def split_data(data, num_part):
] ]
def train(args): def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names,
dev_count = fluid.core.get_cuda_device_count() util_input_names, sum_cost, token_num):
# Context to do validation.
test_program = train_progm.clone()
with fluid.program_guard(test_program):
test_program = fluid.io.get_inference_program([avg_cost])
sum_cost, avg_cost, predict, token_num = transformer( val_data = reader.DataReader(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, src_vocab_fpath=args.src_vocab_fpath,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, trg_vocab_fpath=args.trg_vocab_fpath,
ModelHyperParams.n_head, ModelHyperParams.d_key, fpattern=args.val_file_pattern,
ModelHyperParams.d_value, ModelHyperParams.d_model, token_delimiter=args.token_delimiter,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout, use_token_batch=args.use_token_batch,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps) batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size,
sort_type=args.sort_type,
start_mark=args.special_token[0],
end_mark=args.special_token[1],
unk_mark=args.special_token[2],
# count start and end tokens out
max_length=ModelHyperParams.max_length - 2,
clip_last_batch=False,
shuffle=False,
shuffle_batch=False)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model, test_exe = fluid.ParallelExecutor(
TrainTaskConfig.warmup_steps, use_cuda=TrainTaskConfig.use_gpu,
TrainTaskConfig.learning_rate) main_program=test_program,
optimizer = fluid.optimizer.Adam( share_vars_from=train_exe)
learning_rate=lr_scheduler.learning_rate,
beta1=TrainTaskConfig.beta1, def test(exe=test_exe):
beta2=TrainTaskConfig.beta2, test_total_cost = 0
epsilon=TrainTaskConfig.eps) test_total_token = 0
optimizer.minimize(sum_cost) test_data = read_multiple(
reader=val_data.batch_generator,
place = fluid.CUDAPlace(0) if TrainTaskConfig.use_gpu else fluid.CPUPlace() count=dev_count if args.use_token_batch else 1)
exe = fluid.Executor(place) for batch_id, data in enumerate(test_data()):
feed_list = []
for place_id, data_buffer in enumerate(
split_data(
data, num_part=dev_count)):
data_input_dict, util_input_dict, _ = prepare_batch_input(
data_buffer, data_input_names, util_input_names,
ModelHyperParams.eos_idx, ModelHyperParams.eos_idx,
ModelHyperParams.n_head, ModelHyperParams.d_model)
feed_list.append(
dict(data_input_dict.items() + util_input_dict.items()))
outs = exe.run(feed=feed_list,
fetch_list=[sum_cost.name, token_num.name])
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1])
test_total_cost += sum_cost_val.sum()
test_total_token += token_num_val.sum()
test_avg_cost = test_total_cost / test_total_token
test_ppl = np.exp([min(test_avg_cost, 100)])
return test_avg_cost, test_ppl
return test
def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
token_num, predict):
# Initialize the parameters. # Initialize the parameters.
if TrainTaskConfig.ckpt_path: if TrainTaskConfig.ckpt_path:
fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path) fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path)
lr_scheduler.current_steps = TrainTaskConfig.start_step lr_scheduler.current_steps = TrainTaskConfig.start_step
else: else:
print "init fluid.framework.default_startup_program"
exe.run(fluid.framework.default_startup_program()) exe.run(fluid.framework.default_startup_program())
train_data = reader.DataReader( train_data = reader.DataReader(
src_vocab_fpath=args.src_vocab_fpath, src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.train_file_pattern, fpattern=args.train_file_pattern,
token_delimiter=args.token_delimiter,
use_token_batch=args.use_token_batch, use_token_batch=args.use_token_batch,
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count), batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size, pool_size=args.pool_size,
...@@ -305,77 +370,26 @@ def train(args): ...@@ -305,77 +370,26 @@ def train(args):
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu, use_cuda=TrainTaskConfig.use_gpu,
loss_name=sum_cost.name, loss_name=sum_cost.name,
main_program=train_progm,
build_strategy=build_strategy) build_strategy=build_strategy)
def test_context():
# Context to do validation.
test_program = fluid.default_main_program().clone(for_test=True)
test_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
main_program=test_program,
share_vars_from=train_exe)
val_data = reader.DataReader(
src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.val_file_pattern,
use_token_batch=args.use_token_batch,
batch_size=args.batch_size *
(1 if args.use_token_batch else dev_count),
pool_size=args.pool_size,
sort_type=args.sort_type,
start_mark=args.special_token[0],
end_mark=args.special_token[1],
unk_mark=args.special_token[2],
# count start and end tokens out
max_length=ModelHyperParams.max_length - 2,
clip_last_batch=False,
shuffle=False,
shuffle_batch=False)
def test(exe=test_exe):
test_total_cost = 0
test_total_token = 0
test_data = read_multiple(
reader=val_data.batch_generator,
count=dev_count if args.use_token_batch else 1)
for batch_id, data in enumerate(test_data()):
feed_list = []
for place_id, data_buffer in enumerate(
split_data(
data, num_part=dev_count)):
data_input_dict, util_input_dict, _ = prepare_batch_input(
data_buffer, data_input_names, util_input_names,
ModelHyperParams.eos_idx, ModelHyperParams.eos_idx,
ModelHyperParams.n_head, ModelHyperParams.d_model)
feed_list.append(
dict(data_input_dict.items() + util_input_dict.items()))
outs = exe.run(feed=feed_list,
fetch_list=[sum_cost.name, token_num.name])
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[
1])
test_total_cost += sum_cost_val.sum()
test_total_token += token_num_val.sum()
test_avg_cost = test_total_cost / test_total_token
test_ppl = np.exp([min(test_avg_cost, 100)])
return test_avg_cost, test_ppl
return test
if args.val_file_pattern is not None:
test = test_context()
data_input_names = encoder_data_input_fields + decoder_data_input_fields[: data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
-1] + label_data_input_fields -1] + label_data_input_fields
util_input_names = encoder_util_input_fields + decoder_util_input_fields util_input_names = encoder_util_input_fields + decoder_util_input_fields
if args.val_file_pattern is not None:
test = test_context(train_progm, avg_cost, train_exe, dev_count,
data_input_names, util_input_names, sum_cost,
token_num)
init = False init = False
for pass_id in xrange(TrainTaskConfig.pass_num): for pass_id in xrange(TrainTaskConfig.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
for batch_id, data in enumerate(train_data()): for batch_id, data in enumerate(train_data()):
feed_list = [] feed_list = []
total_num_token = 0 total_num_token = 0
lr_rate = lr_scheduler.update_learning_rate() if args.local:
lr_rate = lr_scheduler.update_learning_rate()
for place_id, data_buffer in enumerate( for place_id, data_buffer in enumerate(
split_data( split_data(
data, num_part=dev_count)): data, num_part=dev_count)):
...@@ -384,11 +398,15 @@ def train(args): ...@@ -384,11 +398,15 @@ def train(args):
ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx,
ModelHyperParams.n_head, ModelHyperParams.d_model) ModelHyperParams.n_head, ModelHyperParams.d_model)
total_num_token += num_token total_num_token += num_token
feed_list.append( feed_kv_pairs = data_input_dict.items() + util_input_dict.items(
dict(data_input_dict.items() + util_input_dict.items() + )
{lr_scheduler.learning_rate.name: lr_rate}.items())) if args.local:
feed_kv_pairs += {
if not init: # init the position encoding table lr_scheduler.learning_rate.name: lr_rate
}.items()
feed_list.append(dict(feed_kv_pairs))
if not init:
for pos_enc_param_name in pos_enc_param_names: for pos_enc_param_name in pos_enc_param_names:
pos_enc = position_encoding_init( pos_enc = position_encoding_init(
ModelHyperParams.max_length + 1, ModelHyperParams.max_length + 1,
...@@ -406,12 +424,16 @@ def train(args): ...@@ -406,12 +424,16 @@ def train(args):
print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(pass_id, batch_id, total_sum_cost, total_avg_cost, (pass_id, batch_id, total_sum_cost, total_avg_cost,
np.exp([min(total_avg_cost, 100)]))) np.exp([min(total_avg_cost, 100)])))
if batch_id > 0 and batch_id % 1000 == 0:
fluid.io.save_persistables(
exe,
os.path.join(TrainTaskConfig.ckpt_dir, "latest.checkpoint"))
init = True init = True
# Validate and save the model for inference. # Validate and save the model for inference.
print("epoch: %d, " % pass_id + ( print("epoch: %d, " % pass_id +
"val avg loss: %f, val ppl: %f, " % test() ("val avg loss: %f, val ppl: %f, " % test()
if args.val_file_pattern is not None else "") + "consumed %fs" % ( if args.val_file_pattern is not None else "") + "consumed %fs" %
time.time() - pass_start_time)) (time.time() - pass_start_time))
fluid.io.save_persistables( fluid.io.save_persistables(
exe, exe,
os.path.join(TrainTaskConfig.ckpt_dir, os.path.join(TrainTaskConfig.ckpt_dir,
...@@ -422,6 +444,107 @@ def train(args): ...@@ -422,6 +444,107 @@ def train(args):
data_input_names[:-2] + util_input_names, [predict], exe) data_input_names[:-2] + util_input_names, [predict], exe)
def train(args):
# priority: ENV > args > config
is_local = os.getenv("PADDLE_IS_LOCAL", "1")
if is_local == '0':
args.local = False
print args
if args.device == 'CPU':
TrainTaskConfig.use_gpu = False
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
if training_role == "PSERVER" or (not TrainTaskConfig.use_gpu):
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
else:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
exe = fluid.Executor(place)
sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate)
if args.local:
optimizer = fluid.optimizer.Adam(
learning_rate=lr_scheduler.learning_rate,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost)
elif args.sync == False:
optimizer = fluid.optimizer.SGD(0.003)
optimizer.minimize(sum_cost)
else:
lr_decay = fluid.layers\
.learning_rate_scheduler\
.noam_decay(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_decay,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost)
if args.local:
print("local start_up:")
train_loop(exe,
fluid.default_main_program(), dev_count, sum_cost, avg_cost,
lr_scheduler, token_num, predict)
else:
port = os.getenv("PADDLE_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
"PADDLE_PORT")
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
print "psserver begin run"
with open('pserver_startup.desc', 'w') as f:
f.write(str(pserver_startup))
with open('pserver_prog.desc', 'w') as f:
f.write(str(pserver_prog))
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
with open('trainer_prog.desc', 'w') as f:
f.write(str(trainer_prog))
train_loop(exe, trainer_prog, dev_count, sum_cost, avg_cost,
lr_scheduler, token_num, predict)
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
train(args) train(args)
import sys
import re
import six
import unicodedata
# Regular expression for unescaping token strings.
# '\u' is converted to '_'
# '\\' is converted to '\'
# '\213;' is converted to unichr(213)
# Inverse of escaping.
_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);")
# This set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = set(
six.unichr(i) for i in range(sys.maxunicode)
if (unicodedata.category(six.unichr(i)).startswith("L") or
unicodedata.category(six.unichr(i)).startswith("N")))
def unescape_token(escaped_token):
"""
Inverse of encoding escaping.
"""
def match(m):
if m.group(1) is None:
return u"_" if m.group(0) == u"\\u" else u"\\"
try:
return six.unichr(int(m.group(1)))
except (ValueError, OverflowError) as _:
return u"\u3013" # Unicode for undefined character.
trimmed = escaped_token[:-1] if escaped_token.endswith(
"_") else escaped_token
return _UNESCAPE_REGEX.sub(match, trimmed)
def subtoken_ids_to_str(subtoken_ids, vocabs):
"""
Convert a list of subtoken(word piece) ids to a native string.
Refer to SubwordTextEncoder in Tensor2Tensor.
"""
subtokens = [vocabs.get(subtoken_id, u"") for subtoken_id in subtoken_ids]
# Convert a list of subtokens to a list of tokens.
concatenated = "".join([
t if isinstance(t, unicode) else t.decode("utf-8") for t in subtokens
])
split = concatenated.split("_")
tokens = []
for t in split:
if t:
unescaped = unescape_token(t + "_")
if unescaped:
tokens.append(unescaped)
# Convert a list of tokens to a unicode string (by inserting spaces bewteen
# word tokens).
token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens]
ret = []
for i, token in enumerate(tokens):
if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]:
ret.append(u" ")
ret.append(token)
seq = "".join(ret)
return seq.encode("utf-8")
./neural_machine_translation/rnn_search
\ No newline at end of file
###!/bin/bash
####This file is only used for continuous evaluation.
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
fi
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True)
train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_acc_kpi,
train_speed_kpi,
train_cost_card4_kpi,
test_acc_card4_kpi,
train_speed_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
#kpi_map = {}
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
#kpi_map[kpi_name] = kpi_value
yield kpi_name, kpi_value
#return kpi_map
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
...@@ -11,10 +11,10 @@ wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip ...@@ -11,10 +11,10 @@ wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
# Extract the data. # Extract the data.
echo "Extracting..." echo "Extracting..."
unzip train2014.tar unzip train2014.zip
unzip val2014.tar unzip val2014.zip
unzip train2017.tar unzip train2017.zip
unzip val2017.tar unzip val2017.zip
unzip annotations_trainval2014.tar unzip annotations_trainval2014.zip
unzip annotations_trainval2017.tar unzip annotations_trainval2017.zip
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
......
...@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv ...@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv
add_arg('model_save_dir', str, 'model', "The path to save model.") add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.") add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort.") add_arg('apply_distort', bool, True, "Whether apply distort.")
add_arg('apply_expand', bool, True, "Whether appley expand.") add_arg('apply_expand', bool, True, "Whether apply expand.")
add_arg('nms_threshold', float, 0.45, "NMS threshold.") add_arg('nms_threshold', float, 0.45, "NMS threshold.")
add_arg('ap_version', str, '11point', "integral, 11point.") add_arg('ap_version', str, '11point', "integral, 11point.")
add_arg('resize_h', int, 300, "The resized image height.") add_arg('resize_h', int, 300, "The resized image height.")
...@@ -32,6 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will ...@@ -32,6 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.") add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
#yapf: enable #yapf: enable
...@@ -44,6 +46,9 @@ def train(args, ...@@ -44,6 +46,9 @@ def train(args,
num_passes, num_passes,
model_save_dir, model_save_dir,
pretrained_model=None): pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w] image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset: if 'coco' in data_args.dataset:
num_classes = 91 num_classes = 91
...@@ -117,8 +122,12 @@ def train(args, ...@@ -117,8 +122,12 @@ def train(args,
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name) use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch( if not args.enable_ce:
reader.train(data_args, train_file_list), batch_size=batch_size) train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size) reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(
...@@ -136,22 +145,29 @@ def train(args, ...@@ -136,22 +145,29 @@ def train(args,
def test(pass_id, best_map): def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var() _, accum_map = map_eval.get_map_var()
map_eval.reset(exe) map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()): for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program, test_map, = exe.run(test_program,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[accum_map]) fetch_list=[accum_map])
if batch_id % 20 == 0: if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map)) print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
if test_map[0] > best_map: if test_map[0] > best_map:
best_map = test_map[0] best_map = test_map[0]
save_model('best_model') save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map)) print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map return best_map, mean_map
total_time = 0.0
for pass_id in range(num_passes): for pass_id in range(num_passes):
epoch_idx = pass_id + 1
start_time = time.time() start_time = time.time()
prev_start_time = start_time prev_start_time = start_time
end_time = 0 every_pass_loss = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
...@@ -165,26 +181,40 @@ def train(args, ...@@ -165,26 +181,40 @@ def train(args,
loss_v, = exe.run(fluid.default_main_program(), loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss]) fetch_list=[loss])
end_time = time.time()
loss_v = np.mean(np.array(loss_v)) loss_v = np.mean(np.array(loss_v))
every_pass_loss.append(loss_v)
if batch_id % 20 == 0: if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format( print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time)) pass_id, batch_id, loss_v, start_time - prev_start_time))
best_map = test(pass_id, best_map)
end_time = time.time()
best_map, mean_map = test(pass_id, best_map)
if args.enable_ce and pass_id == 1:
total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx))
else:
print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % (devices_num, mean_map))
print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx))
if pass_id % 10 == 0 or pass_id == num_passes - 1: if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id)) save_model(str(pass_id))
print("Best test map {0}".format(best_map)) print("Best test map {0}".format(best_map))
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
data_dir = 'data/pascalvoc' data_dir = args.data_dir
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
label_file = 'label_list' label_file = 'label_list'
model_save_dir = args.model_save_dir model_save_dir = args.model_save_dir
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
if 'coco' in args.dataset: if 'coco' in args.dataset:
data_dir = 'data/coco' data_dir = 'data/coco'
if '2014' in args.dataset: if '2014' in args.dataset:
......
...@@ -113,6 +113,10 @@ data/test_images/00003.jpg ...@@ -113,6 +113,10 @@ data/test_images/00003.jpg
``` ```
env CUDA_VISIABLE_DEVICES=0 python ctc_train.py env CUDA_VISIABLE_DEVICES=0 python ctc_train.py
``` ```
使用默认数据在CPU上训练:
```
env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False --parallel=False
```
使用默认数据在GPU多卡上训练: 使用默认数据在GPU多卡上训练:
......
...@@ -12,7 +12,8 @@ def conv_bn_pool(input, ...@@ -12,7 +12,8 @@ def conv_bn_pool(input,
bias=None, bias=None,
param_0=None, param_0=None,
is_test=False, is_test=False,
pooling=True): pooling=True,
use_cudnn=False):
tmp = input tmp = input
for i in xrange(group): for i in xrange(group):
tmp = fluid.layers.conv2d( tmp = fluid.layers.conv2d(
...@@ -22,7 +23,7 @@ def conv_bn_pool(input, ...@@ -22,7 +23,7 @@ def conv_bn_pool(input,
padding=1, padding=1,
param_attr=param if param_0 is None else param_0, param_attr=param if param_0 is None else param_0,
act=None, # LinearActivation act=None, # LinearActivation
use_cudnn=True) use_cudnn=use_cudnn)
tmp = fluid.layers.batch_norm( tmp = fluid.layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act,
...@@ -35,13 +36,17 @@ def conv_bn_pool(input, ...@@ -35,13 +36,17 @@ def conv_bn_pool(input,
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=2, pool_stride=2,
use_cudnn=True, use_cudnn=use_cudnn,
ceil_mode=True) ceil_mode=True)
return tmp return tmp
def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): def ocr_convs(input,
regularizer=None,
gradient_clip=None,
is_test=False,
use_cudnn=False):
b = fluid.ParamAttr( b = fluid.ParamAttr(
regularizer=regularizer, regularizer=regularizer,
gradient_clip=gradient_clip, gradient_clip=gradient_clip,
...@@ -56,12 +61,36 @@ def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): ...@@ -56,12 +61,36 @@ def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False):
initializer=fluid.initializer.Normal(0.0, 0.01)) initializer=fluid.initializer.Normal(0.0, 0.01))
tmp = input tmp = input
tmp = conv_bn_pool( tmp = conv_bn_pool(
tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test) tmp,
2, [16, 16],
param=w1,
bias=b,
param_0=w0,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool( tmp = conv_bn_pool(
tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test, pooling=False) tmp,
2, [32, 32],
param=w1,
bias=b,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(
tmp,
2, [64, 64],
param=w1,
bias=b,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(
tmp,
2, [128, 128],
param=w1,
bias=b,
is_test=is_test,
pooling=False,
use_cudnn=use_cudnn)
return tmp return tmp
...@@ -70,12 +99,14 @@ def encoder_net(images, ...@@ -70,12 +99,14 @@ def encoder_net(images,
rnn_hidden_size=200, rnn_hidden_size=200,
regularizer=None, regularizer=None,
gradient_clip=None, gradient_clip=None,
is_test=False): is_test=False,
use_cudnn=False):
conv_features = ocr_convs( conv_features = ocr_convs(
images, images,
regularizer=regularizer, regularizer=regularizer,
gradient_clip=gradient_clip, gradient_clip=gradient_clip,
is_test=is_test) is_test=is_test,
use_cudnn=use_cudnn)
sliced_feature = fluid.layers.im2sequence( sliced_feature = fluid.layers.im2sequence(
input=conv_features, input=conv_features,
stride=[1, 1], stride=[1, 1],
...@@ -142,7 +173,11 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -142,7 +173,11 @@ def ctc_train_net(images, label, args, num_classes):
learning_rate_decay = None learning_rate_decay = None
regularizer = fluid.regularizer.L2Decay(L2_RATE) regularizer = fluid.regularizer.L2Decay(L2_RATE)
fc_out = encoder_net(images, num_classes, regularizer=regularizer) fc_out = encoder_net(
images,
num_classes,
regularizer=regularizer,
use_cudnn=True if args.use_gpu else False)
cost = fluid.layers.warpctc( cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True) input=fc_out, label=label, blank=num_classes, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost) sum_cost = fluid.layers.reduce_sum(cost)
...@@ -166,19 +201,18 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -166,19 +201,18 @@ def ctc_train_net(images, label, args, num_classes):
if args.average_window > 0: if args.average_window > 0:
model_average = fluid.optimizer.ModelAverage( model_average = fluid.optimizer.ModelAverage(
args.average_window, args.average_window,
params_grads,
min_average_window=args.min_average_window, min_average_window=args.min_average_window,
max_average_window=args.max_average_window) max_average_window=args.max_average_window)
return sum_cost, error_evaluator, inference_program, model_average return sum_cost, error_evaluator, inference_program, model_average
def ctc_infer(images, num_classes): def ctc_infer(images, num_classes, use_cudnn):
fc_out = encoder_net(images, num_classes, is_test=True) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
def ctc_eval(images, label, num_classes): def ctc_eval(images, label, num_classes, use_cudnn):
fc_out = encoder_net(images, num_classes, is_test=True) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
decoded_out = fluid.layers.ctc_greedy_decoder( decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes) input=fc_out, blank=num_classes)
......
...@@ -25,7 +25,7 @@ class DataGenerator(object): ...@@ -25,7 +25,7 @@ class DataGenerator(object):
def __init__(self): def __init__(self):
pass pass
def train_reader(self, img_root_dir, img_label_list, batchsize): def train_reader(self, img_root_dir, img_label_list, batchsize, cycle):
''' '''
Reader interface for training. Reader interface for training.
...@@ -35,6 +35,10 @@ class DataGenerator(object): ...@@ -35,6 +35,10 @@ class DataGenerator(object):
:param img_label_list: The path of the <image_name, label> file for training. :param img_label_list: The path of the <image_name, label> file for training.
:type img_label_list: str :type img_label_list: str
:param cycle: If number of iterations is greater than dataset_size / batch_size
it reiterates dataset over as many times as necessary.
:type cycle: bool
''' '''
img_label_lines = [] img_label_lines = []
...@@ -65,24 +69,29 @@ class DataGenerator(object): ...@@ -65,24 +69,29 @@ class DataGenerator(object):
def reader(): def reader():
sizes = len(img_label_lines) / batchsize sizes = len(img_label_lines) / batchsize
for i in range(sizes): if sizes == 0:
result = [] raise ValueError('Batch size is bigger than the dataset size.')
sz = [0, 0] while True:
for j in range(batchsize): for i in range(sizes):
line = img_label_lines[i * batchsize + j] result = []
# h, w, img_name, labels sz = [0, 0]
items = line.split(' ') for j in range(batchsize):
line = img_label_lines[i * batchsize + j]
label = [int(c) for c in items[-1].split(',')] # h, w, img_name, labels
img = Image.open(os.path.join(img_root_dir, items[ items = line.split(' ')
2])).convert('L') #zhuanhuidu
if j == 0: label = [int(c) for c in items[-1].split(',')]
sz = img.size img = Image.open(os.path.join(img_root_dir, items[
img = img.resize((sz[0], sz[1])) 2])).convert('L') #zhuanhuidu
img = np.array(img) - 127.5 if j == 0:
img = img[np.newaxis, ...] sz = img.size
result.append([img, label]) img = img.resize((sz[0], sz[1]))
yield result img = np.array(img) - 127.5
img = img[np.newaxis, ...]
result.append([img, label])
yield result
if not cycle:
break
return reader return reader
...@@ -111,7 +120,7 @@ class DataGenerator(object): ...@@ -111,7 +120,7 @@ class DataGenerator(object):
return reader return reader
def infer_reader(self, img_root_dir=None, img_label_list=None): def infer_reader(self, img_root_dir=None, img_label_list=None, cycle=False):
'''A reader interface for inference. '''A reader interface for inference.
:param img_root_dir: The root path of the images for training. :param img_root_dir: The root path of the images for training.
...@@ -122,11 +131,15 @@ class DataGenerator(object): ...@@ -122,11 +131,15 @@ class DataGenerator(object):
was None. If img_label_list was set to None, it will read image path was None. If img_label_list was set to None, it will read image path
from stdin. from stdin.
:type img_root_dir: str :type img_root_dir: str
:param cycle: If number of iterations is greater than dataset_size /
batch_size it reiterates dataset over as many times as necessary.
:type cycle: bool
''' '''
def reader(): def reader():
if img_label_list is not None: def yield_img_and_label(lines):
for line in open(img_label_list): for line in lines:
if img_root_dir is not None: if img_root_dir is not None:
# h, w, img_name, labels # h, w, img_name, labels
img_name = line.split(' ')[2] img_name = line.split(' ')[2]
...@@ -138,6 +151,16 @@ class DataGenerator(object): ...@@ -138,6 +151,16 @@ class DataGenerator(object):
img = img[np.newaxis, ...] img = img[np.newaxis, ...]
label = [int(c) for c in line.split(' ')[3].split(',')] label = [int(c) for c in line.split(' ')[3].split(',')]
yield img, label yield img, label
if img_label_list is not None:
lines = []
with open(img_label_list) as f:
lines = f.readlines()
for img, label in yield_img_and_label(lines):
yield img, label
while cycle:
for img, label in yield_img_and_label(lines):
yield img, label
else: else:
while True: while True:
img_path = raw_input("Please input the path of image: ") img_path = raw_input("Please input the path of image: ")
...@@ -161,14 +184,15 @@ def data_shape(): ...@@ -161,14 +184,15 @@ def data_shape():
return DATA_SHAPE return DATA_SHAPE
def train(batch_size, train_images_dir=None, train_list_file=None): def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
generator = DataGenerator() generator = DataGenerator()
if train_images_dir is None: if train_images_dir is None:
data_dir = download_data() data_dir = download_data()
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
if train_list_file is None: if train_list_file is None:
train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME)
return generator.train_reader(train_images_dir, train_list_file, batch_size) return generator.train_reader(train_images_dir, train_list_file, batch_size,
cycle)
def test(batch_size=1, test_images_dir=None, test_list_file=None): def test(batch_size=1, test_images_dir=None, test_list_file=None):
...@@ -182,10 +206,14 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None): ...@@ -182,10 +206,14 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None):
generator.test_reader(test_images_dir, test_list_file), batch_size) generator.test_reader(test_images_dir, test_list_file), batch_size)
def inference(infer_images_dir=None, infer_list_file=None): def inference(batch_size=1,
infer_images_dir=None,
infer_list_file=None,
cycle=False):
generator = DataGenerator() generator = DataGenerator()
return paddle.batch( return paddle.batch(
generator.infer_reader(infer_images_dir, infer_list_file), 1) generator.infer_reader(infer_images_dir, infer_list_file, cycle),
batch_size)
def download_data(): def download_data():
......
"""Trainer for OCR CTC model.""" """Trainer for OCR CTC model."""
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net from crnn_ctc_model import ctc_train_net
import ctc_reader import ctc_reader
...@@ -14,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__) ...@@ -14,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.") add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('total_step', int, 720000, "Number of training iterations.") add_arg('total_step', int, 720000, "The number of iterations. Zero or less means whole training set. More than 0 means the training set might be looped until # of iterations is reached.")
add_arg('log_period', int, 1000, "Log period.") add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
...@@ -25,6 +26,9 @@ add_arg('min_average_window',int, 10000, "Min average window.") ...@@ -25,6 +26,9 @@ add_arg('min_average_window',int, 10000, "Min average window.")
add_arg('max_average_window',int, 12500, "Max average window. It is proposed to be set as the number of minibatch in a pass.") add_arg('max_average_window',int, 12500, "Max average window. It is proposed to be set as the number of minibatch in a pass.")
add_arg('average_window', float, 0.15, "Average window.") add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, False, "Whether use parallel training.") add_arg('parallel', bool, False, "Whether use parallel training.")
add_arg('profile', bool, False, "Whether to use profiling.")
add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.")
add_arg('skip_test', bool, False, "Whether to skip test phase.")
# yapf: enable # yapf: enable
...@@ -49,7 +53,8 @@ def train(args, data_reader=ctc_reader): ...@@ -49,7 +53,8 @@ def train(args, data_reader=ctc_reader):
train_reader = data_reader.train( train_reader = data_reader.train(
args.batch_size, args.batch_size,
train_images_dir=train_images, train_images_dir=train_images,
train_list_file=train_list) train_list_file=train_list,
cycle=args.total_step > 0)
test_reader = data_reader.test( test_reader = data_reader.test(
test_images_dir=test_images, test_list_file=test_list) test_images_dir=test_images, test_list_file=test_list)
...@@ -74,7 +79,7 @@ def train(args, data_reader=ctc_reader): ...@@ -74,7 +79,7 @@ def train(args, data_reader=ctc_reader):
error_evaluator.reset(exe) error_evaluator.reset(exe)
if args.parallel: if args.parallel:
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=True, loss_name=sum_cost.name) use_cuda=True if args.use_gpu else False, loss_name=sum_cost.name)
fetch_vars = [sum_cost] + error_evaluator.metrics fetch_vars = [sum_cost] + error_evaluator.metrics
...@@ -85,8 +90,8 @@ def train(args, data_reader=ctc_reader): ...@@ -85,8 +90,8 @@ def train(args, data_reader=ctc_reader):
feed=get_feeder_data(data, place)) feed=get_feeder_data(data, place))
results = [np.array(result).sum() for result in results] results = [np.array(result).sum() for result in results]
else: else:
results = exe.run(feed=get_feeder_data(data, place), results = train_exe.run(feed=get_feeder_data(data, place),
fetch_list=fetch_vars) fetch_list=fetch_vars)
results = [result[0] for result in results] results = [result[0] for result in results]
return results return results
...@@ -109,17 +114,29 @@ def train(args, data_reader=ctc_reader): ...@@ -109,17 +114,29 @@ def train(args, data_reader=ctc_reader):
print "Saved model to: %s/%s." % (args.save_model_dir, filename) print "Saved model to: %s/%s." % (args.save_model_dir, filename)
iter_num = 0 iter_num = 0
while True: stop = False
while not stop:
total_loss = 0.0 total_loss = 0.0
total_seq_error = 0.0 total_seq_error = 0.0
batch_times = []
# train a pass # train a pass
for data in train_reader(): for data in train_reader():
iter_num += 1 if args.total_step > 0 and iter_num == args.total_step + args.skip_batch_num:
if iter_num > args.total_step: stop = True
return break
if iter_num < args.skip_batch_num:
print("Warm-up iteration")
if iter_num == args.skip_batch_num:
profiler.reset_profiler()
start = time.time()
results = train_one_batch(data) results = train_one_batch(data)
batch_time = time.time() - start
fps = args.batch_size / batch_time
batch_times.append(batch_time)
total_loss += results[0] total_loss += results[0]
total_seq_error += results[2] total_seq_error += results[2]
iter_num += 1
# training log # training log
if iter_num % args.log_period == 0: if iter_num % args.log_period == 0:
print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % ( print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % (
...@@ -131,7 +148,7 @@ def train(args, data_reader=ctc_reader): ...@@ -131,7 +148,7 @@ def train(args, data_reader=ctc_reader):
total_seq_error = 0.0 total_seq_error = 0.0
# evaluate # evaluate
if iter_num % args.eval_period == 0: if not args.skip_test and iter_num % args.eval_period == 0:
if model_average: if model_average:
with model_average.apply(exe): with model_average.apply(exe):
test(iter_num) test(iter_num)
...@@ -145,12 +162,35 @@ def train(args, data_reader=ctc_reader): ...@@ -145,12 +162,35 @@ def train(args, data_reader=ctc_reader):
save_model(args, exe, iter_num) save_model(args, exe, iter_num)
else: else:
save_model(args, exe, iter_num) save_model(args, exe, iter_num)
# Postprocess benchmark data
latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies)
latency_pc99 = np.percentile(latencies, 99)
fpses = np.divide(args.batch_size, latencies)
fps_avg = np.average(fpses)
fps_pc99 = np.percentile(fpses, 1)
# Benchmark output
print('\nTotal examples (incl. warm-up): %d' %
(iter_num * args.batch_size))
print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
latency_pc99))
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg,
fps_pc99))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
train(args, data_reader=ctc_reader) if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args, data_reader=ctc_reader)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
train(args, data_reader=ctc_reader)
else:
train(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer from crnn_ctc_model import ctc_infer
import numpy as np import numpy as np
...@@ -7,6 +8,7 @@ import ctc_reader ...@@ -7,6 +8,7 @@ import ctc_reader
import argparse import argparse
import functools import functools
import os import os
import time
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -16,6 +18,10 @@ add_arg('input_images_dir', str, None, "The directory of images.") ...@@ -16,6 +18,10 @@ add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.") add_arg('input_images_list', str, None, "The list file of images.")
add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.") add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.")
add_arg('use_gpu', bool, True, "Whether use GPU to infer.") add_arg('use_gpu', bool, True, "Whether use GPU to infer.")
add_arg('iterations', int, 0, "The number of iterations. Zero or less means whole test set. More than 0 means the test set might be looped until # of iterations is reached.")
add_arg('profile', bool, False, "Whether to use profiling.")
add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.")
add_arg('batch_size', int, 1, "The minibatch size.")
# yapf: enable # yapf: enable
...@@ -25,11 +31,14 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -25,11 +31,14 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence = infer(images, num_classes) sequence = infer(
images, num_classes, use_cudnn=True if args.use_gpu else False)
# data reader # data reader
infer_reader = data_reader.inference( infer_reader = data_reader.inference(
batch_size=args.batch_size,
infer_images_dir=args.input_images_dir, infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list) infer_list_file=args.input_images_list,
cycle=True if args.iterations > 0 else False)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
if args.use_gpu: if args.use_gpu:
...@@ -56,23 +65,67 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -56,23 +65,67 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path print "Init model from: %s." % args.model_path
batch_times = []
iters = 0
for data in infer_reader(): for data in infer_reader():
if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
break
if iters < args.skip_batch_num:
print("Warm-up itaration")
if iters == args.skip_batch_num:
profiler.reset_profiler()
start = time.time()
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed=get_feeder_data( feed=get_feeder_data(
data, place, need_label=False), data, place, need_label=False),
fetch_list=[sequence], fetch_list=[sequence],
return_numpy=False) return_numpy=False)
batch_time = time.time() - start
fps = args.batch_size / batch_time
batch_times.append(batch_time)
indexes = np.array(result[0]).flatten() indexes = np.array(result[0]).flatten()
if dict_map is not None: if dict_map is not None:
print "result: %s" % ([dict_map[index] for index in indexes], ) print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
[dict_map[index] for index in indexes], )
else: else:
print "result: %s" % (indexes, ) print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
indexes, )
iters += 1
latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies)
latency_pc99 = np.percentile(latencies, 99)
fpses = np.divide(args.batch_size, latencies)
fps_avg = np.average(fpses)
fps_pc99 = np.percentile(fpses, 1)
# Benchmark output
print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size))
print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
latency_pc99))
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
inference(args, data_reader=ctc_reader) if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
inference(args, data_reader=ctc_reader)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
inference(args, data_reader=ctc_reader)
else:
inference(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
## Introduction
Scripts enclosed in the folder serve as examples of commands that start training
and inference of a model, and are subject to further customisation.
# Running with MKL-DNN
In order to run training or inference using MKL-DNN library, please use
`FLAGS_use_mkldnn=1` environmental variable.
## Prerequisites
In order to run the training and inference, no special requirements are posed.
## Training
To run training on *CPU*, please execute:
```sh
source train.sh CPU
```
To run training on *CPU* with MKL-DNN, please execute:
```sh
source train.sh MKLDNN
```
To run training on *GPU*, please execute:
```sh
source train.sh GPU
```
## Inference
To perform inference on the trained model using *CPU*, please run:
```sh
source infer.sh CPU
```
To perform inference on the trained model using *CPU* with MKL-DNN, please run:
```sh
source infer.sh MKLDNN
```
To perform inference on the trained model using *GPU*, please run:
```sh
source infer.sh GPU
```
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
mode=$1 # gpu, cpu, mkldnn
if [ "$mode" = "CPU" ]; then
use_gpu="False"
model_path="cpu_model"
elif [ "$mode" = "GPU" ]; then
use_gpu="True"
model_path="gpu_model"
elif [ "$mode" = "MKLDNN" ]; then
use_gpu="False"
model_path="mkldnn_model"
export FLAGS_use_mkldnn=1
else
echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
exit 1
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,0,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="FALSE"
fi
else # HT is ON
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,1,0"
fi
fi
python ../infer.py \
--model_path $model_path/model_00001 \
--input_images_list ~/.cache/paddle/dataset/ctc_data/data/test.list \
--input_images_dir ~/.cache/paddle/dataset/ctc_data/data/test_images \
--use_gpu $use_gpu \
--batch_size 32 \
--iterations 5 \
--skip_batch_num 2
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
batch_size=32
core_num=`lscpu |grep -m1 "CPU(s)"|awk -F':' '{print $2}'|xargs`
mode=$1 # gpu, cpu, mkldnn
if [ "$mode" = "CPU" ]; then
if [ $core_num -gt $batch_size ]; then
echo "Batch size should be greater or equal to the number of
available cores, when parallel mode is set to True."
fi
use_gpu="False"
save_model_dir="cpu_model"
parallel="True"
elif [ "$mode" = "GPU" ]; then
use_gpu="True"
save_model_dir="gpu_model"
parallel="True"
elif [ "$mode" = "MKLDNN" ]; then
if [ $core_num -gt $batch_size ]; then
echo "Batch size should be greater or equal to the number of
available cores, when parallel mode is set to True."
fi
use_gpu="False"
save_model_dir="mkldnn_model"
parallel="False"
export FLAGS_use_mkldnn=1
else
echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
exit 1
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,0,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="FALSE"
fi
else # HT is ON
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,1,0"
fi
fi
python ../ctc_train.py \
--use_gpu $use_gpu \
--parallel $parallel \
--batch_size $batch_size \
--save_model_period 1 \
--total_step 1 \
--save_model_dir $save_model_dir
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册