提交 737b69f2 编写于 作者: Y Yu Yang

Merge branch 'develop' of https://github.com/PaddlePaddle/models into...

Merge branch 'develop' of https://github.com/PaddlePaddle/models into speed_up_transformer_python_reader
.DS_Store .DS_Store
*.pyc *.pyc
.*~ .*~
fluid/neural_machine_translation/transformer/deps
fluid/neural_machine_translation/transformer/train.data
fluid/neural_machine_translation/transformer/train.pkl
fluid/neural_machine_translation/transformer/train.sh
fluid/neural_machine_translation/transformer/train.tok.clean.bpe.32000.en-de
fluid/neural_machine_translation/transformer/vocab.bpe.32000.refined
...@@ -9,3 +9,4 @@ log* ...@@ -9,3 +9,4 @@ log*
output* output*
pred pred
eval_tools eval_tools
box*
...@@ -24,15 +24,10 @@ def calc_diff(f1, f2): ...@@ -24,15 +24,10 @@ def calc_diff(f1, f2):
#print d2.shape #print d2.shape
#print d1[0, 0, 0:10, 0:10] #print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10] #print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2]
#d2 = d2[:, :, 1:-2, 1:-2]
d1 = d1.flatten() d1 = d1.flatten()
d2 = d2.flatten() d2 = d2.flatten()
#print d1[:10]
#print d2[:10]
d1_num = reduce(lambda x, y: x * y, d1.shape) d1_num = reduce(lambda x, y: x * y, d1.shape)
d2_num = reduce(lambda x, y: x * y, d2.shape) d2_num = reduce(lambda x, y: x * y, d2.shape)
if d1_num != d2_num: if d1_num != d2_num:
...@@ -41,7 +36,11 @@ def calc_diff(f1, f2): ...@@ -41,7 +36,11 @@ def calc_diff(f1, f2):
assert (d1_num == d2_num), "their shape is not consistent" assert (d1_num == d2_num), "their shape is not consistent"
try: try:
mask = np.abs(d1) >= np.abs(d2)
mask = mask.astype('int32')
df = np.abs(d1 - d2) df = np.abs(d1 - d2)
df = df / (1.0e-10 + np.abs(d1) * mask + np.abs(d2) * (1 - mask))
max_df = np.max(df) max_df = np.max(df)
sq_df = np.mean(df * df) sq_df = np.mean(df * df)
return max_df, sq_df return max_df, sq_df
......
...@@ -39,6 +39,7 @@ LAYER_DESCRIPTORS = { ...@@ -39,6 +39,7 @@ LAYER_DESCRIPTORS = {
'Pooling': shape_pool, 'Pooling': shape_pool,
'Power': shape_identity, 'Power': shape_identity,
'ReLU': shape_identity, 'ReLU': shape_identity,
'PReLU': shape_identity,
'Scale': shape_identity, 'Scale': shape_identity,
'Sigmoid': shape_identity, 'Sigmoid': shape_identity,
'SigmoidCrossEntropyLoss': shape_scalar, 'SigmoidCrossEntropyLoss': shape_scalar,
......
...@@ -240,10 +240,16 @@ class Network(object): ...@@ -240,10 +240,16 @@ class Network(object):
@layer @layer
def relu(self, input, name): def relu(self, input, name):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.relu( output = fluid.layers.relu(input)
name=self.get_unique_output_name(name, 'relu'), x=input)
return output return output
@layer
def prelu(self, input, channel_shared, name):
#fluid = import_fluid()
#output = fluid.layers.relu(input)
#return output
raise NotImplementedError('prelu not implemented')
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name): name):
# Get the number of channels in the input # Get the number of channels in the input
...@@ -382,7 +388,8 @@ class Network(object): ...@@ -382,7 +388,8 @@ class Network(object):
name, name,
scale_offset=True, scale_offset=True,
eps=1e-5, eps=1e-5,
relu=False): relu=False,
relu_negative_slope=0.0):
# NOTE: Currently, only inference is supported # NOTE: Currently, only inference is supported
fluid = import_fluid() fluid = import_fluid()
prefix = name + '_' prefix = name + '_'
...@@ -392,6 +399,15 @@ class Network(object): ...@@ -392,6 +399,15 @@ class Network(object):
name=prefix + 'offset') name=prefix + 'offset')
mean_name = prefix + 'mean' mean_name = prefix + 'mean'
variance_name = prefix + 'variance' variance_name = prefix + 'variance'
leaky_relu = False
act = 'relu'
if relu is False:
act = None
elif relu_negative_slope != 0.0:
leaky_relu = True
act = None
output = fluid.layers.batch_norm( output = fluid.layers.batch_norm(
name=self.get_unique_output_name(name, 'batch_norm'), name=self.get_unique_output_name(name, 'batch_norm'),
input=input, input=input,
...@@ -401,7 +417,10 @@ class Network(object): ...@@ -401,7 +417,10 @@ class Network(object):
moving_mean_name=mean_name, moving_mean_name=mean_name,
moving_variance_name=variance_name, moving_variance_name=variance_name,
epsilon=eps, epsilon=eps,
act='relu' if relu is True else None) act=act)
if leaky_relu:
output = fluid.layers.leaky_relu(output, alpha=relu_negative_slope)
return output return output
......
...@@ -112,6 +112,13 @@ class PaddleMapper(NodeMapper): ...@@ -112,6 +112,13 @@ class PaddleMapper(NodeMapper):
def map_relu(self, node): def map_relu(self, node):
return PaddleNode('relu') return PaddleNode('relu')
def map_prelu(self, node):
channel_shared = getattr(node.parameters, 'channel_shared', False)
return PaddleNode('prelu', channel_shared)
def map_tanh(self, node):
return PaddleNode('tanh')
def map_pooling(self, node): def map_pooling(self, node):
pool_type = node.parameters.pool pool_type = node.parameters.pool
if pool_type == 0: if pool_type == 0:
......
...@@ -160,5 +160,5 @@ def val(file_list=TEST_LIST): ...@@ -160,5 +160,5 @@ def val(file_list=TEST_LIST):
return _reader_creator(file_list, 'val', shuffle=False) return _reader_creator(file_list, 'val', shuffle=False)
def test(file_list): def test(file_list=TEST_LIST):
return _reader_creator(file_list, 'test', shuffle=False) return _reader_creator(file_list, 'test', shuffle=False)
...@@ -157,7 +157,8 @@ def train(args): ...@@ -157,7 +157,8 @@ def train(args):
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name)
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
......
#!/bin/bash
# This file is only used for continuous evaluation.
rm -rf *_factor.txt
model_file='model.py'
python $model_file --batch_size 128 --pass_num 5 --device CPU | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
tracking_kpis = [
train_acc_kpi,
train_cost_kpi,
test_acc_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
import time
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
SEED = 90
DTYPE = "float32"
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
def parse_args():
parser = argparse.ArgumentParser("mnist model benchmark.")
parser.add_argument(
'--batch_size', type=int, default=128, help='The minibatch size.')
parser.add_argument(
'--iterations', type=int, default=35, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=5, help='The number of passes.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
args = parser.parse_args()
return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def cnn_model(data):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=data,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
# TODO(dzhwinter) : refine the initializer and random seed settting
SIZE = 10
input_shape = conv_pool_2.shape
param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5
predict = fluid.layers.fc(
input=conv_pool_2,
size=SIZE,
act="softmax",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)))
return predict
def eval_test(exe, batch_acc, batch_size_tensor, inference_program):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=args.batch_size)
test_pass_acc = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
acc, weight = exe.run(inference_program,
feed={"pixel": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_pass_acc.add(value=acc, weight=weight)
pass_acc = test_pass_acc.eval()
return pass_acc
def run_benchmark(model, args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
start_time = time.time()
# Input data
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# Train program
predict = model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
# Evaluator
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)
# inference program
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
target_vars=[batch_acc, batch_size_tensor])
# Optimization
opt = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, beta1=0.9, beta2=0.999)
opt.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
# Initialize executor
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# Parameter initialization
exe.run(fluid.default_startup_program())
# Reader
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.average.WeightedAverage()
for pass_id in range(args.pass_num):
accuracy.reset()
pass_start = time.time()
every_pass_loss = []
for batch_id, data in enumerate(train_reader()):
img_data = np.array(
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
start = time.time()
loss, acc, weight = exe.run(
fluid.default_main_program(),
feed={"pixel": img_data,
"label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
) # The accuracy is the accumulation of batches, but not the current batch.
end = time.time()
accuracy.add(value=acc, weight=weight)
every_pass_loss.append(loss)
print("Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, batch_id, loss, acc))
pass_end = time.time()
train_avg_acc = accuracy.eval()
train_avg_loss = np.mean(every_pass_loss)
test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
inference_program)
print(
"pass=%d, train_avg_acc=%f,train_avg_loss=%f, test_avg_acc=%f, elapse=%f"
% (pass_id, train_avg_acc, train_avg_loss, test_avg_acc,
(pass_end - pass_start)))
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
print("kpis train_acc %f" % train_avg_acc)
print("kpis train_cost %f" % train_avg_loss)
print("kpis test_acc %f" % test_avg_acc)
print("kpis train_duration %f" % (pass_end - pass_start))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(cnn_model, args)
else:
run_benchmark(cnn_model, args)
...@@ -9,13 +9,14 @@ ...@@ -9,13 +9,14 @@
```text ```text
. .
├── images # README 文档中的图片 ├── images # README 文档中的图片
├── optim.py # learning rate scheduling 计算程序 ├── config.py # 训练、预测以及模型参数配置
├── infer.py # 预测脚本 ├── infer.py # 预测脚本
├── model.py # 模型定义 ├── model.py # 模型定义
├── optim.py # learning rate scheduling 计算程序
├── reader.py # 数据读取接口 ├── reader.py # 数据读取接口
├── README.md # 文档 ├── README.md # 文档
├── train.py # 训练脚本 ├── train.py # 训练脚本
└── config.py # 训练、预测以及模型参数配置 └── util.py # wordpiece 数据解码工具
``` ```
### 简介 ### 简介
...@@ -58,34 +59,43 @@ Decoder 具有和 Encoder 类似的结构,只是相比于组成 Encoder 的 la ...@@ -58,34 +59,43 @@ Decoder 具有和 Encoder 类似的结构,只是相比于组成 Encoder 的 la
### 数据准备 ### 数据准备
我们以 [WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)作为示例,同时参照论文中的设置使用 BPE(byte-pair encoding)[4]编码的数据,使用这种方式表示的数据能够更好的解决未登录词(out-of-vocabulary,OOV)的问题。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载,下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。 WMT 数据集是机器翻译领域公认的主流数据集;WMT 英德和英法数据集也是 Transformer 论文中所用数据集,其中英德数据集使用了 BPE(byte-pair encoding)[4]编码的数据,英法数据集使用了 wordpiece [5]的数据。我们这里也将使用 WMT 英德和英法翻译数据,并和论文保持一致使用 BPE 和 wordpiece 的数据,下面给出了使用的方法。对于其他自定义数据,参照下文遵循或转换为类似的数据格式即可。
#### WMT 英德翻译数据
由于本示例中的数据读取脚本 `reader.py` 使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(句子中的词之间使用空格分隔), 因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并: [WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。
由于本示例中的数据读取脚本 `reader.py` 默认使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(默认句子中的词之间使用空格分隔),因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并:
```sh ```sh
paste -d '\t' train.tok.clean.bpe.32000.en train.tok.clean.bpe.32000.de > train.tok.clean.bpe.32000.en-de paste -d '\t' train.tok.clean.bpe.32000.en train.tok.clean.bpe.32000.de > train.tok.clean.bpe.32000.en-de
``` ```
此外,下载的词典文件 `vocab.bpe.32000` 中未包含表示序列开始、序列结束和未登录词的特殊符号,可以使用如下命令在词典中加入 `<s>``<e>``<unk>` 作为这三个特殊符号。 此外,下载的词典文件 `vocab.bpe.32000` 中未包含表示序列开始、序列结束和未登录词的特殊符号,可以使用如下命令在词典中加入 `<s>``<e>``<unk>` 作为这三个特殊符号(用 BPE 表示数据已有效避免了未登录词的问题,这里加入只是做通用处理)
```sh ```sh
sed -i '1i\<s>\n<e>\n<unk>' vocab.bpe.32000 sed -i '1i\<s>\n<e>\n<unk>' vocab.bpe.32000
``` ```
对于其他自定义数据,遵循或转换为上述的数据格式即可。如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理。 #### WMT 英法翻译数据
[WMT'14 EN-FR 数据集](http://www.statmt.org/wmt14/translation-task.html)是一个较大规模的数据集。参照论文,英法数据我们使用 wordpiece 表示的数据,wordpiece 和 BPE 类似同为采用 sub-word units 来解决 OOV 问题的方法[5]。我们提供了已完成预处理的 wordpiece 数据的下载,可以从[这里](http://transformer-data.bj.bcebos.com/wmt14_enfr.tar)下载,其中 `train.wordpiece.en-fr` 为使用 wordpiece 的训练数据,`newstest2014.wordpiece.en-fr` 为测试数据(`newstest2014.tok.en``newstest2014.tok.fr` 为对应的未经 wordpiece 处理过的测试数据,使用[脚本](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl)进行了 tokenize 的处理),`vocab.wordpiece.en-fr` 为相应的词典文件(源语言和目标语言共享该词典文件)。
提供的英法翻译数据无需进行额外的处理,可以直接使用;需要注意的是,这些用 wordpiece 表示的数据中句子内的 token 之间使用 `\x01` 而非空格进行分隔(因部分 token 内包含空格),这需要在训练时进行指定。
### 模型训练 ### 模型训练
`train.py` 是模型训练脚本,可以执行以下命令进行模型训练: `train.py` 是模型训练脚本。以英德翻译数据为例,可以执行以下命令进行模型训练:
```sh ```sh
python -u train.py \ python -u train.py \
--src_vocab_fpath data/vocab.bpe.32000 \ --src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \ --train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \
--use_token_batch True \ --use_token_batch True \
--batch_size 3200 \ --batch_size 3200 \
--sort_type pool \ --sort_type pool \
--pool_size 200000 \ --pool_size 200000
``` ```
上述命令中设置了源语言词典文件路径(`src_vocab_fpath`)、目标语言词典文件路径(`trg_vocab_fpath`)、训练数据文件(`train_file_pattern`,支持通配符)等数据相关的参数和构造 batch 方式(`use_token_batch`数据按照 token 数目或者 sequence 数目组成 batch)等 reader 相关的参数。有关这些参数更详细的信息可以通过执行以下命令查看: 上述命令中设置了源语言词典文件路径(`src_vocab_fpath`)、目标语言词典文件路径(`trg_vocab_fpath`)、训练数据文件(`train_file_pattern`,支持通配符)等数据相关的参数和构造 batch 方式(`use_token_batch`定了数据按照 token 数目或者 sequence 数目组成 batch)等 reader 相关的参数。有关这些参数更详细的信息可以通过执行以下命令查看:
```sh ```sh
python train.py --help python train.py --help
``` ```
...@@ -98,19 +108,20 @@ python -u train.py \ ...@@ -98,19 +108,20 @@ python -u train.py \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \ --train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \
--use_token_batch True \ --use_token_batch True \
--batch_size 3200 \ --batch_size 3200 \
--sort_type pool \ --sort_type pool \
--pool_size 200000 \ --pool_size 200000 \
n_layer 8 \ n_layer 6 \
n_head 16 \ n_head 16 \
d_model 1024 \ d_model 1024 \
d_inner_hid 4096 \ d_inner_hid 4096 \
dropout 0.3 dropout 0.3
``` ```
有关这些参数更详细信息的还请参考 `config.py` 中的注释说明 有关这些参数更详细信息的请参考 `config.py` 中的注释说明。对于英法翻译数据,执行训练和英德翻译训练类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外要注意的是由于英法翻译数据 token 间不是使用空格进行分隔,需要修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`
训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用CPU训练(通过参数--divice CPU),训练速度相对较慢。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 iteration 将打印如下的日志到标准输出: 训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 epoch 内也会每隔1000个 iteration 进行一次保存,每个 iteration 将打印如下的日志到标准输出:
```txt ```txt
epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531 epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531
epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438 epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438
...@@ -126,37 +137,45 @@ epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187 ...@@ -126,37 +137,45 @@ epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187
### 模型预测 ### 模型预测
`infer.py` 是模型预测脚本,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译: `infer.py` 是模型预测脚本。以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译:
```sh ```sh
python -u infer.py \ python -u infer.py \
--src_vocab_fpath data/vocab.bpe.32000 \ --src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--test_file_pattern data/newstest2013.tok.bpe.32000.en-de \ --test_file_pattern data/newstest2013.tok.bpe.32000.en-de \
--use_wordpiece False \
--token_delimiter ' ' \
--batch_size 4 \ --batch_size 4 \
model_path trained_models/pass_20.infer.model \ model_path trained_models/pass_20.infer.model \
beam_size 5 beam_size 5 \
max_out_len 256 max_out_len 256
``` ```
和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。 和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。
执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。需要注意,对于使用 BPE 的数据,预测出的翻译结果也将是 BPE 表示的数据,要恢复成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中。 执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。对于使用 BPE 的英德数据,预测出的翻译结果也将是 BPE 表示的数据,要还原成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中(无需再次 tokenize 处理):
```sh ```sh
sed 's/@@ //g' predict.txt > predict.tok.txt sed 's/@@ //g' predict.txt > predict.tok.txt
``` ```
接下来就可以使用参考翻译(这里使用的是 `newstest2013.tok.de`)对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的一个较为广泛使用的脚本可以从[这里](https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl)获取,获取后执行如下命令: 对于英法翻译的 wordpiece 数据,执行预测和英德翻译预测类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外需要注意修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`;同时要修改 `use_wordpiece` 参数的设置为 `--use_wordpiece True`,这会在预测时将翻译得到的 wordpiece 数据还原为原始数据输出。为了使用 tokenize 的数据进行评估,还需要对翻译结果进行 tokenize 的处理,[Moses](https://github.com/moses-smt/mosesdecoder) 提供了一系列机器翻译相关的脚本。执行 `git clone https://github.com/moses-smt/mosesdecoder.git` 克隆 mosesdecoder 仓库后,可以使用其中的 `tokenizer.perl` 脚本对 `predict.txt` 内的翻译结果进行 tokenize 处理并输出到 `predict.tok.txt` 中,如下:
```sh
perl mosesdecoder/scripts/tokenizer/tokenizer.perl -l fr < predict.txt > predict.tok.txt
```
接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2013.tok.de` 数据为例,执行如下命令:
```sh ```sh
perl multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt
``` ```
可以看到类似如下的结果。 可以看到类似如下的结果。
``` ```
BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412) BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412)
``` ```
目前在未使用 model average 的情况下,使用默认配置单机八卡(同论文中 base model 的配置)进行训练,英德翻译在 `newstest2013` 上测试 BLEU 值为25.,在 `newstest2014` 上测试 BLEU 值为26.;英法翻译在 `newstest2014` 上测试 BLEU 值为36.。
### 分布式训练 ### 分布式训练
transformer 模型支持同步或者异步的分布式训练。分布式的配置主要两个方面: Transformer 模型支持同步或者异步的分布式训练。分布式的配置主要两个方面:
1 命令行配置 1 命令行配置
...@@ -234,3 +253,4 @@ export PADDLE_PORT=6177 ...@@ -234,3 +253,4 @@ export PADDLE_PORT=6177
2. He K, Zhang X, Ren S, et al. [Deep residual learning for image recognition](http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf)[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. 2. He K, Zhang X, Ren S, et al. [Deep residual learning for image recognition](http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf)[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778.
3. Ba J L, Kiros J R, Hinton G E. [Layer normalization](https://arxiv.org/pdf/1607.06450.pdf)[J]. arXiv preprint arXiv:1607.06450, 2016. 3. Ba J L, Kiros J R, Hinton G E. [Layer normalization](https://arxiv.org/pdf/1607.06450.pdf)[J]. arXiv preprint arXiv:1607.06450, 2016.
4. Sennrich R, Haddow B, Birch A. [Neural machine translation of rare words with subword units](https://arxiv.org/pdf/1508.07909)[J]. arXiv preprint arXiv:1508.07909, 2015. 4. Sennrich R, Haddow B, Birch A. [Neural machine translation of rare words with subword units](https://arxiv.org/pdf/1508.07909)[J]. arXiv preprint arXiv:1508.07909, 2015.
5. Wu Y, Schuster M, Chen Z, et al. [Google's neural machine translation system: Bridging the gap between human and machine translation](https://arxiv.org/pdf/1609.08144.pdf)[J]. arXiv preprint arXiv:1609.08144, 2016.
import argparse import argparse
import ast
import numpy as np import numpy as np
from functools import partial
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -11,6 +13,7 @@ from model import fast_decode as fast_decoder ...@@ -11,6 +13,7 @@ from model import fast_decode as fast_decoder
from config import * from config import *
from train import pad_batch_data from train import pad_batch_data
import reader import reader
import util
def parse_args(): def parse_args():
...@@ -46,6 +49,22 @@ def parse_args(): ...@@ -46,6 +49,22 @@ def parse_args():
default=["<s>", "<e>", "<unk>"], default=["<s>", "<e>", "<unk>"],
nargs=3, nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--use_wordpiece",
type=ast.literal_eval,
default=False,
help="The flag indicating if the data in wordpiece. The EN-FR data "
"we provided is wordpiece data. For wordpiece data, converting ids to "
"original words is a little different and some special codes are "
"provided in util.py to do this.")
parser.add_argument(
"--token_delimiter",
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter.; "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument( parser.add_argument(
'opts', 'opts',
help='See config.py for all options', help='See config.py for all options',
...@@ -320,7 +339,7 @@ def post_process_seq(seq, ...@@ -320,7 +339,7 @@ def post_process_seq(seq,
seq) seq)
def py_infer(test_data, trg_idx2word): def py_infer(test_data, trg_idx2word, use_wordpiece):
""" """
Inference by beam search implented by python, while the calculations from Inference by beam search implented by python, while the calculations from
symbols to probilities execute by Fluid operators. symbols to probilities execute by Fluid operators.
...@@ -399,7 +418,10 @@ def py_infer(test_data, trg_idx2word): ...@@ -399,7 +418,10 @@ def py_infer(test_data, trg_idx2word):
seqs = map(post_process_seq, batch_seqs[i]) seqs = map(post_process_seq, batch_seqs[i])
scores = batch_scores[i] scores = batch_scores[i]
for seq in seqs: for seq in seqs:
print(" ".join([trg_idx2word[idx] for idx in seq])) if use_wordpiece:
print(util.subword_ids_to_str(seq, trg_idx2word))
else:
print(" ".join([trg_idx2word[idx] for idx in seq]))
def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx,
...@@ -465,7 +487,7 @@ def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, ...@@ -465,7 +487,7 @@ def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx,
return input_dict return input_dict
def fast_infer(test_data, trg_idx2word): def fast_infer(test_data, trg_idx2word, use_wordpiece):
""" """
Inference by beam search decoder based solely on Fluid operators. Inference by beam search decoder based solely on Fluid operators.
""" """
...@@ -520,7 +542,9 @@ def fast_infer(test_data, trg_idx2word): ...@@ -520,7 +542,9 @@ def fast_infer(test_data, trg_idx2word):
trg_idx2word[idx] trg_idx2word[idx]
for idx in post_process_seq( for idx in post_process_seq(
np.array(seq_ids)[sub_start:sub_end]) np.array(seq_ids)[sub_start:sub_end])
])) ]) if not use_wordpiece else util.subtoken_ids_to_str(
post_process_seq(np.array(seq_ids)[sub_start:sub_end]),
trg_idx2word))
scores[i].append(np.array(seq_scores)[sub_end - 1]) scores[i].append(np.array(seq_scores)[sub_end - 1])
print hyps[i][-1] print hyps[i][-1]
if len(hyps[i]) >= InferTaskConfig.n_best: if len(hyps[i]) >= InferTaskConfig.n_best:
...@@ -533,8 +557,9 @@ def infer(args, inferencer=fast_infer): ...@@ -533,8 +557,9 @@ def infer(args, inferencer=fast_infer):
src_vocab_fpath=args.src_vocab_fpath, src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.test_file_pattern, fpattern=args.test_file_pattern,
batch_size=args.batch_size, token_delimiter=args.token_delimiter,
use_token_batch=False, use_token_batch=False,
batch_size=args.batch_size,
pool_size=args.pool_size, pool_size=args.pool_size,
sort_type=reader.SortType.NONE, sort_type=reader.SortType.NONE,
shuffle=False, shuffle=False,
...@@ -547,7 +572,7 @@ def infer(args, inferencer=fast_infer): ...@@ -547,7 +572,7 @@ def infer(args, inferencer=fast_infer):
clip_last_batch=False) clip_last_batch=False)
trg_idx2word = test_data.load_dict( trg_idx2word = test_data.load_dict(
dict_path=args.trg_vocab_fpath, reverse=True) dict_path=args.trg_vocab_fpath, reverse=True)
inferencer(test_data, trg_idx2word) inferencer(test_data, trg_idx2word, args.use_wordpiece)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -12,15 +12,17 @@ class SortType(object): ...@@ -12,15 +12,17 @@ class SortType(object):
class Converter(object): class Converter(object):
def __init__(self, vocab, beg, end, unk): def __init__(self, vocab, beg, end, unk, delimiter):
self._vocab = vocab self._vocab = vocab
self._beg = beg self._beg = beg
self._end = end self._end = end
self._unk = unk self._unk = unk
self._delimiter = delimiter
def __call__(self, sentence): def __call__(self, sentence):
return [self._beg] + [ return [self._beg] + [
self._vocab.get(w, self._unk) for w in sentence.split() self._vocab.get(w, self._unk)
for w in sentence.split(self._delimiter)
] + [self._end] ] + [self._end]
...@@ -146,9 +148,12 @@ class DataReader(object): ...@@ -146,9 +148,12 @@ class DataReader(object):
:param use_token_batch: Whether to produce batch data according to :param use_token_batch: Whether to produce batch data according to
token number. token number.
:type use_token_batch: bool :type use_token_batch: bool
:param delimiter: The delimiter used to split source and target in each :param field_delimiter: The delimiter used to split source and target in
line of data file. each line of data file.
:type delimiter: basestring :type field_delimiter: basestring
:param token_delimiter: The delimiter used to split tokens in source or
target sentences.
:type token_delimiter: basestring
:param start_mark: The token representing for the beginning of :param start_mark: The token representing for the beginning of
sentences in dictionary. sentences in dictionary.
:type start_mark: basestring :type start_mark: basestring
...@@ -175,12 +180,12 @@ class DataReader(object): ...@@ -175,12 +180,12 @@ class DataReader(object):
shuffle=True, shuffle=True,
shuffle_batch=False, shuffle_batch=False,
use_token_batch=False, use_token_batch=False,
delimiter="\t", field_delimiter="\t",
token_delimiter=" ",
start_mark="<s>", start_mark="<s>",
end_mark="<e>", end_mark="<e>",
unk_mark="<unk>", unk_mark="<unk>",
seed=0, seed=0):
pkl_filename=None):
self._src_vocab = self.load_dict(src_vocab_fpath) self._src_vocab = self.load_dict(src_vocab_fpath)
self._only_src = True self._only_src = True
if trg_vocab_fpath is not None: if trg_vocab_fpath is not None:
...@@ -195,24 +200,11 @@ class DataReader(object): ...@@ -195,24 +200,11 @@ class DataReader(object):
self._shuffle_batch = shuffle_batch self._shuffle_batch = shuffle_batch
self._min_length = min_length self._min_length = min_length
self._max_length = max_length self._max_length = max_length
self._delimiter = delimiter self._field_delimiter = field_delimiter
self._token_delimiter = token_delimiter
if pkl_filename is None: self._epoch_batches = []
self.load_src_trg_ids(end_mark, fpattern, start_mark, tar_fname, self.load_src_trg_ids(end_mark, fpattern, start_mark, tar_fname,
unk_mark) unk_mark)
else:
try:
with open(pkl_filename, 'r') as f:
self._src_seq_ids, self._trg_seq_ids, self._sample_infos = cPickle.load(
f)
except:
self.load_src_trg_ids(end_mark, fpattern, start_mark, tarfile,
unk_mark)
with open(pkl_filename, 'w') as f:
cPickle.dump((self._src_seq_ids, self._trg_seq_ids,
self._sample_infos), f,
cPickle.HIGHEST_PROTOCOL)
self._random = random.Random(x=seed) self._random = random.Random(x=seed)
def load_src_trg_ids(self, end_mark, fpattern, start_mark, tar_fname, def load_src_trg_ids(self, end_mark, fpattern, start_mark, tar_fname,
...@@ -222,7 +214,8 @@ class DataReader(object): ...@@ -222,7 +214,8 @@ class DataReader(object):
vocab=self._src_vocab, vocab=self._src_vocab,
beg=self._src_vocab[start_mark], beg=self._src_vocab[start_mark],
end=self._src_vocab[end_mark], end=self._src_vocab[end_mark],
unk=self._src_vocab[unk_mark]) unk=self._src_vocab[unk_mark],
delimiter=self._token_delimiter)
] ]
if not self._only_src: if not self._only_src:
converters.append( converters.append(
...@@ -230,7 +223,8 @@ class DataReader(object): ...@@ -230,7 +223,8 @@ class DataReader(object):
vocab=self._trg_vocab, vocab=self._trg_vocab,
beg=self._trg_vocab[start_mark], beg=self._trg_vocab[start_mark],
end=self._trg_vocab[end_mark], end=self._trg_vocab[end_mark],
unk=self._trg_vocab[unk_mark])) unk=self._trg_vocab[unk_mark],
delimiter=self._token_delimiter))
converters = ComposedConverter(converters) converters = ComposedConverter(converters)
...@@ -256,7 +250,7 @@ class DataReader(object): ...@@ -256,7 +250,7 @@ class DataReader(object):
f = tarfile.open(fpaths[0], 'r') f = tarfile.open(fpaths[0], 'r')
for line in f.extractfile(tar_fname): for line in f.extractfile(tar_fname):
yield line.split(self._delimiter) yield line.split(self._field_delimiter)
else: else:
for fpath in fpaths: for fpath in fpaths:
if not os.path.isfile(fpath): if not os.path.isfile(fpath):
...@@ -264,7 +258,7 @@ class DataReader(object): ...@@ -264,7 +258,7 @@ class DataReader(object):
with open(fpath, 'r') as f: with open(fpath, 'r') as f:
for line in f: for line in f:
yield line.split(self._delimiter) yield line.split(self._field_delimiter)
@staticmethod @staticmethod
def load_dict(dict_path, reverse=False): def load_dict(dict_path, reverse=False):
...@@ -272,9 +266,9 @@ class DataReader(object): ...@@ -272,9 +266,9 @@ class DataReader(object):
with open(dict_path, "r") as fdict: with open(dict_path, "r") as fdict:
for idx, line in enumerate(fdict): for idx, line in enumerate(fdict):
if reverse: if reverse:
word_dict[idx] = line.strip() word_dict[idx] = line.strip('\n')
else: else:
word_dict[line.strip()] = idx word_dict[line.strip('\n')] = idx
return word_dict return word_dict
def batch_generator(self): def batch_generator(self):
......
...@@ -3,6 +3,7 @@ import ast ...@@ -3,6 +3,7 @@ import ast
import multiprocessing import multiprocessing
import os import os
import time import time
from functools import partial
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -75,6 +76,14 @@ def parse_args(): ...@@ -75,6 +76,14 @@ def parse_args():
default=["<s>", "<e>", "<unk>"], default=["<s>", "<e>", "<unk>"],
nargs=3, nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--token_delimiter",
type=partial(
str.decode, encoding="string-escape"),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument( parser.add_argument(
'opts', 'opts',
help='See config.py for all options', help='See config.py for all options',
...@@ -272,6 +281,7 @@ def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names, ...@@ -272,6 +281,7 @@ def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names,
src_vocab_fpath=args.src_vocab_fpath, src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.val_file_pattern, fpattern=args.val_file_pattern,
token_delimiter=args.token_delimiter,
use_token_batch=args.use_token_batch, use_token_batch=args.use_token_batch,
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count), batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size, pool_size=args.pool_size,
...@@ -334,6 +344,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -334,6 +344,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
src_vocab_fpath=args.src_vocab_fpath, src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.train_file_pattern, fpattern=args.train_file_pattern,
token_delimiter=args.token_delimiter,
use_token_batch=args.use_token_batch, use_token_batch=args.use_token_batch,
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count), batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size, pool_size=args.pool_size,
...@@ -376,6 +387,8 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -376,6 +387,8 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
for batch_id, data in enumerate(train_data()): for batch_id, data in enumerate(train_data()):
feed_list = [] feed_list = []
total_num_token = 0 total_num_token = 0
if args.local:
lr_rate = lr_scheduler.update_learning_rate()
for place_id, data_buffer in enumerate( for place_id, data_buffer in enumerate(
split_data( split_data(
data, num_part=dev_count)): data, num_part=dev_count)):
...@@ -387,7 +400,6 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -387,7 +400,6 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
feed_kv_pairs = data_input_dict.items() + util_input_dict.items( feed_kv_pairs = data_input_dict.items() + util_input_dict.items(
) )
if args.local: if args.local:
lr_rate = lr_scheduler.update_learning_rate()
feed_kv_pairs += { feed_kv_pairs += {
lr_scheduler.learning_rate.name: lr_rate lr_scheduler.learning_rate.name: lr_rate
}.items() }.items()
...@@ -411,6 +423,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -411,6 +423,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(pass_id, batch_id, total_sum_cost, total_avg_cost, (pass_id, batch_id, total_sum_cost, total_avg_cost,
np.exp([min(total_avg_cost, 100)]))) np.exp([min(total_avg_cost, 100)])))
if batch_id > 0 and batch_id % 1000 == 0:
fluid.io.save_persistables(
exe,
os.path.join(TrainTaskConfig.ckpt_dir, "latest.checkpoint"))
init = True init = True
# Validate and save the model for inference. # Validate and save the model for inference.
print("epoch: %d, " % pass_id + print("epoch: %d, " % pass_id +
......
import sys
import re
import six
import unicodedata
# Regular expression for unescaping token strings.
# '\u' is converted to '_'
# '\\' is converted to '\'
# '\213;' is converted to unichr(213)
# Inverse of escaping.
_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);")
# This set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = set(
six.unichr(i) for i in range(sys.maxunicode)
if (unicodedata.category(six.unichr(i)).startswith("L") or
unicodedata.category(six.unichr(i)).startswith("N")))
def unescape_token(escaped_token):
"""
Inverse of encoding escaping.
"""
def match(m):
if m.group(1) is None:
return u"_" if m.group(0) == u"\\u" else u"\\"
try:
return six.unichr(int(m.group(1)))
except (ValueError, OverflowError) as _:
return u"\u3013" # Unicode for undefined character.
trimmed = escaped_token[:-1] if escaped_token.endswith(
"_") else escaped_token
return _UNESCAPE_REGEX.sub(match, trimmed)
def subtoken_ids_to_str(subtoken_ids, vocabs):
"""
Convert a list of subtoken(word piece) ids to a native string.
Refer to SubwordTextEncoder in Tensor2Tensor.
"""
subtokens = [vocabs.get(subtoken_id, u"") for subtoken_id in subtoken_ids]
# Convert a list of subtokens to a list of tokens.
concatenated = "".join([
t if isinstance(t, unicode) else t.decode("utf-8") for t in subtokens
])
split = concatenated.split("_")
tokens = []
for t in split:
if t:
unescaped = unescape_token(t + "_")
if unescaped:
tokens.append(unescaped)
# Convert a list of tokens to a unicode string (by inserting spaces bewteen
# word tokens).
token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens]
ret = []
for i, token in enumerate(tokens):
if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]:
ret.append(u" ")
ret.append(token)
seq = "".join(ret)
return seq.encode("utf-8")
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${object_detection_cudaid:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
bash ./.move.sh
fi
FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 --for_model_ce=True --data_dir=/root/.cache/paddle/dataset/pascalvoc/
...@@ -32,6 +32,10 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will ...@@ -32,6 +32,10 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.") add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('for_model_ce', bool, False, "Use CE to evaluate the model")
add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.")
add_arg('iterations', int, 120, "mini batchs.")
#yapf: enable #yapf: enable
...@@ -148,13 +152,20 @@ def train(args, ...@@ -148,13 +152,20 @@ def train(args,
print("Pass {0}, test map {1}".format(pass_id, test_map)) print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map return best_map
train_num = 0
total_train_time = 0.0
for pass_id in range(num_passes): for pass_id in range(num_passes):
start_time = time.time() start_time = time.time()
prev_start_time = start_time prev_start_time = start_time
end_time = 0 # end_time = 0
every_pass_loss = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
if args.for_model_ce and iter == args.iterations:
break
if len(data) < (devices_num * 2): if len(data) < (devices_num * 2):
print("There are too few data to train on all devices.") print("There are too few data to train on all devices.")
continue continue
...@@ -165,11 +176,28 @@ def train(args, ...@@ -165,11 +176,28 @@ def train(args,
loss_v, = exe.run(fluid.default_main_program(), loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss]) fetch_list=[loss])
end_time = time.time() # end_time = time.time()
loss_v = np.mean(np.array(loss_v)) loss_v = np.mean(np.array(loss_v))
if batch_id % 20 == 0: if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format( print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time)) pass_id, batch_id, loss_v, start_time - prev_start_time))
if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0:
batch_duration = time.time() - start_time
pass_duration += batch_duration
train_num += len(data)
every_pass_loss.append(loss_v)
iter += 1
total_train_time += pass_duration
if args.for_model_ce and pass_id == num_passes - 1:
examples_per_sec = train_num / total_train_time
cost = np.mean(every_pass_loss)
with open("train_speed_factor.txt", 'w') as f:
f.write('{:f}\n'.format(examples_per_sec))
with open("train_cost_factor.txt", 'a+') as f:
f.write('{:f}\n'.format(cost))
best_map = test(pass_id, best_map) best_map = test(pass_id, best_map)
if pass_id % 10 == 0 or pass_id == num_passes - 1: if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id)) save_model(str(pass_id))
...@@ -180,11 +208,11 @@ if __name__ == '__main__': ...@@ -180,11 +208,11 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
data_dir = 'data/pascalvoc' data_dir = args.data_dir
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
label_file = 'label_list' label_file = 'label_list'
model_save_dir = args.model_save_dir model_save_dir = args.model_save_dir
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
if 'coco' in args.dataset: if 'coco' in args.dataset:
data_dir = 'data/coco' data_dir = 'data/coco'
if '2014' in args.dataset: if '2014' in args.dataset:
......
...@@ -113,6 +113,10 @@ data/test_images/00003.jpg ...@@ -113,6 +113,10 @@ data/test_images/00003.jpg
``` ```
env CUDA_VISIABLE_DEVICES=0 python ctc_train.py env CUDA_VISIABLE_DEVICES=0 python ctc_train.py
``` ```
使用默认数据在CPU上训练:
```
env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False --parallel=False
```
使用默认数据在GPU多卡上训练: 使用默认数据在GPU多卡上训练:
......
...@@ -12,7 +12,8 @@ def conv_bn_pool(input, ...@@ -12,7 +12,8 @@ def conv_bn_pool(input,
bias=None, bias=None,
param_0=None, param_0=None,
is_test=False, is_test=False,
pooling=True): pooling=True,
use_cudnn=False):
tmp = input tmp = input
for i in xrange(group): for i in xrange(group):
tmp = fluid.layers.conv2d( tmp = fluid.layers.conv2d(
...@@ -22,7 +23,7 @@ def conv_bn_pool(input, ...@@ -22,7 +23,7 @@ def conv_bn_pool(input,
padding=1, padding=1,
param_attr=param if param_0 is None else param_0, param_attr=param if param_0 is None else param_0,
act=None, # LinearActivation act=None, # LinearActivation
use_cudnn=True) use_cudnn=use_cudnn)
tmp = fluid.layers.batch_norm( tmp = fluid.layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act,
...@@ -35,13 +36,17 @@ def conv_bn_pool(input, ...@@ -35,13 +36,17 @@ def conv_bn_pool(input,
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=2, pool_stride=2,
use_cudnn=True, use_cudnn=use_cudnn,
ceil_mode=True) ceil_mode=True)
return tmp return tmp
def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): def ocr_convs(input,
regularizer=None,
gradient_clip=None,
is_test=False,
use_cudnn=False):
b = fluid.ParamAttr( b = fluid.ParamAttr(
regularizer=regularizer, regularizer=regularizer,
gradient_clip=gradient_clip, gradient_clip=gradient_clip,
...@@ -56,12 +61,36 @@ def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): ...@@ -56,12 +61,36 @@ def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False):
initializer=fluid.initializer.Normal(0.0, 0.01)) initializer=fluid.initializer.Normal(0.0, 0.01))
tmp = input tmp = input
tmp = conv_bn_pool( tmp = conv_bn_pool(
tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test) tmp,
2, [16, 16],
param=w1,
bias=b,
param_0=w0,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool( tmp = conv_bn_pool(
tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test, pooling=False) tmp,
2, [32, 32],
param=w1,
bias=b,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(
tmp,
2, [64, 64],
param=w1,
bias=b,
is_test=is_test,
use_cudnn=use_cudnn)
tmp = conv_bn_pool(
tmp,
2, [128, 128],
param=w1,
bias=b,
is_test=is_test,
pooling=False,
use_cudnn=use_cudnn)
return tmp return tmp
...@@ -70,12 +99,14 @@ def encoder_net(images, ...@@ -70,12 +99,14 @@ def encoder_net(images,
rnn_hidden_size=200, rnn_hidden_size=200,
regularizer=None, regularizer=None,
gradient_clip=None, gradient_clip=None,
is_test=False): is_test=False,
use_cudnn=False):
conv_features = ocr_convs( conv_features = ocr_convs(
images, images,
regularizer=regularizer, regularizer=regularizer,
gradient_clip=gradient_clip, gradient_clip=gradient_clip,
is_test=is_test) is_test=is_test,
use_cudnn=use_cudnn)
sliced_feature = fluid.layers.im2sequence( sliced_feature = fluid.layers.im2sequence(
input=conv_features, input=conv_features,
stride=[1, 1], stride=[1, 1],
...@@ -142,7 +173,11 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -142,7 +173,11 @@ def ctc_train_net(images, label, args, num_classes):
learning_rate_decay = None learning_rate_decay = None
regularizer = fluid.regularizer.L2Decay(L2_RATE) regularizer = fluid.regularizer.L2Decay(L2_RATE)
fc_out = encoder_net(images, num_classes, regularizer=regularizer) fc_out = encoder_net(
images,
num_classes,
regularizer=regularizer,
use_cudnn=True if args.use_gpu else False)
cost = fluid.layers.warpctc( cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True) input=fc_out, label=label, blank=num_classes, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost) sum_cost = fluid.layers.reduce_sum(cost)
...@@ -166,19 +201,18 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -166,19 +201,18 @@ def ctc_train_net(images, label, args, num_classes):
if args.average_window > 0: if args.average_window > 0:
model_average = fluid.optimizer.ModelAverage( model_average = fluid.optimizer.ModelAverage(
args.average_window, args.average_window,
params_grads,
min_average_window=args.min_average_window, min_average_window=args.min_average_window,
max_average_window=args.max_average_window) max_average_window=args.max_average_window)
return sum_cost, error_evaluator, inference_program, model_average return sum_cost, error_evaluator, inference_program, model_average
def ctc_infer(images, num_classes): def ctc_infer(images, num_classes, use_cudnn):
fc_out = encoder_net(images, num_classes, is_test=True) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
def ctc_eval(images, label, num_classes): def ctc_eval(images, label, num_classes, use_cudnn):
fc_out = encoder_net(images, num_classes, is_test=True) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
decoded_out = fluid.layers.ctc_greedy_decoder( decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes) input=fc_out, blank=num_classes)
......
...@@ -25,7 +25,7 @@ class DataGenerator(object): ...@@ -25,7 +25,7 @@ class DataGenerator(object):
def __init__(self): def __init__(self):
pass pass
def train_reader(self, img_root_dir, img_label_list, batchsize): def train_reader(self, img_root_dir, img_label_list, batchsize, cycle):
''' '''
Reader interface for training. Reader interface for training.
...@@ -35,6 +35,10 @@ class DataGenerator(object): ...@@ -35,6 +35,10 @@ class DataGenerator(object):
:param img_label_list: The path of the <image_name, label> file for training. :param img_label_list: The path of the <image_name, label> file for training.
:type img_label_list: str :type img_label_list: str
:param cycle: If number of iterations is greater than dataset_size / batch_size
it reiterates dataset over as many times as necessary.
:type cycle: bool
''' '''
img_label_lines = [] img_label_lines = []
...@@ -65,24 +69,29 @@ class DataGenerator(object): ...@@ -65,24 +69,29 @@ class DataGenerator(object):
def reader(): def reader():
sizes = len(img_label_lines) / batchsize sizes = len(img_label_lines) / batchsize
for i in range(sizes): if sizes == 0:
result = [] raise ValueError('Batch size is bigger than the dataset size.')
sz = [0, 0] while True:
for j in range(batchsize): for i in range(sizes):
line = img_label_lines[i * batchsize + j] result = []
# h, w, img_name, labels sz = [0, 0]
items = line.split(' ') for j in range(batchsize):
line = img_label_lines[i * batchsize + j]
label = [int(c) for c in items[-1].split(',')] # h, w, img_name, labels
img = Image.open(os.path.join(img_root_dir, items[ items = line.split(' ')
2])).convert('L') #zhuanhuidu
if j == 0: label = [int(c) for c in items[-1].split(',')]
sz = img.size img = Image.open(os.path.join(img_root_dir, items[
img = img.resize((sz[0], sz[1])) 2])).convert('L') #zhuanhuidu
img = np.array(img) - 127.5 if j == 0:
img = img[np.newaxis, ...] sz = img.size
result.append([img, label]) img = img.resize((sz[0], sz[1]))
yield result img = np.array(img) - 127.5
img = img[np.newaxis, ...]
result.append([img, label])
yield result
if not cycle:
break
return reader return reader
...@@ -111,7 +120,7 @@ class DataGenerator(object): ...@@ -111,7 +120,7 @@ class DataGenerator(object):
return reader return reader
def infer_reader(self, img_root_dir=None, img_label_list=None): def infer_reader(self, img_root_dir=None, img_label_list=None, cycle=False):
'''A reader interface for inference. '''A reader interface for inference.
:param img_root_dir: The root path of the images for training. :param img_root_dir: The root path of the images for training.
...@@ -122,11 +131,15 @@ class DataGenerator(object): ...@@ -122,11 +131,15 @@ class DataGenerator(object):
was None. If img_label_list was set to None, it will read image path was None. If img_label_list was set to None, it will read image path
from stdin. from stdin.
:type img_root_dir: str :type img_root_dir: str
:param cycle: If number of iterations is greater than dataset_size /
batch_size it reiterates dataset over as many times as necessary.
:type cycle: bool
''' '''
def reader(): def reader():
if img_label_list is not None: def yield_img_and_label(lines):
for line in open(img_label_list): for line in lines:
if img_root_dir is not None: if img_root_dir is not None:
# h, w, img_name, labels # h, w, img_name, labels
img_name = line.split(' ')[2] img_name = line.split(' ')[2]
...@@ -138,6 +151,16 @@ class DataGenerator(object): ...@@ -138,6 +151,16 @@ class DataGenerator(object):
img = img[np.newaxis, ...] img = img[np.newaxis, ...]
label = [int(c) for c in line.split(' ')[3].split(',')] label = [int(c) for c in line.split(' ')[3].split(',')]
yield img, label yield img, label
if img_label_list is not None:
lines = []
with open(img_label_list) as f:
lines = f.readlines()
for img, label in yield_img_and_label(lines):
yield img, label
while cycle:
for img, label in yield_img_and_label(lines):
yield img, label
else: else:
while True: while True:
img_path = raw_input("Please input the path of image: ") img_path = raw_input("Please input the path of image: ")
...@@ -161,14 +184,15 @@ def data_shape(): ...@@ -161,14 +184,15 @@ def data_shape():
return DATA_SHAPE return DATA_SHAPE
def train(batch_size, train_images_dir=None, train_list_file=None): def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
generator = DataGenerator() generator = DataGenerator()
if train_images_dir is None: if train_images_dir is None:
data_dir = download_data() data_dir = download_data()
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
if train_list_file is None: if train_list_file is None:
train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME)
return generator.train_reader(train_images_dir, train_list_file, batch_size) return generator.train_reader(train_images_dir, train_list_file, batch_size,
cycle)
def test(batch_size=1, test_images_dir=None, test_list_file=None): def test(batch_size=1, test_images_dir=None, test_list_file=None):
...@@ -182,10 +206,14 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None): ...@@ -182,10 +206,14 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None):
generator.test_reader(test_images_dir, test_list_file), batch_size) generator.test_reader(test_images_dir, test_list_file), batch_size)
def inference(infer_images_dir=None, infer_list_file=None): def inference(batch_size=1,
infer_images_dir=None,
infer_list_file=None,
cycle=False):
generator = DataGenerator() generator = DataGenerator()
return paddle.batch( return paddle.batch(
generator.infer_reader(infer_images_dir, infer_list_file), 1) generator.infer_reader(infer_images_dir, infer_list_file, cycle),
batch_size)
def download_data(): def download_data():
......
"""Trainer for OCR CTC model.""" """Trainer for OCR CTC model."""
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net from crnn_ctc_model import ctc_train_net
import ctc_reader import ctc_reader
...@@ -14,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__) ...@@ -14,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.") add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('total_step', int, 720000, "Number of training iterations.") add_arg('total_step', int, 720000, "The number of iterations. Zero or less means whole training set. More than 0 means the training set might be looped until # of iterations is reached.")
add_arg('log_period', int, 1000, "Log period.") add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
...@@ -25,6 +26,9 @@ add_arg('min_average_window',int, 10000, "Min average window.") ...@@ -25,6 +26,9 @@ add_arg('min_average_window',int, 10000, "Min average window.")
add_arg('max_average_window',int, 12500, "Max average window. It is proposed to be set as the number of minibatch in a pass.") add_arg('max_average_window',int, 12500, "Max average window. It is proposed to be set as the number of minibatch in a pass.")
add_arg('average_window', float, 0.15, "Average window.") add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, False, "Whether use parallel training.") add_arg('parallel', bool, False, "Whether use parallel training.")
add_arg('profile', bool, False, "Whether to use profiling.")
add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.")
add_arg('skip_test', bool, False, "Whether to skip test phase.")
# yapf: enable # yapf: enable
...@@ -49,7 +53,8 @@ def train(args, data_reader=ctc_reader): ...@@ -49,7 +53,8 @@ def train(args, data_reader=ctc_reader):
train_reader = data_reader.train( train_reader = data_reader.train(
args.batch_size, args.batch_size,
train_images_dir=train_images, train_images_dir=train_images,
train_list_file=train_list) train_list_file=train_list,
cycle=args.total_step > 0)
test_reader = data_reader.test( test_reader = data_reader.test(
test_images_dir=test_images, test_list_file=test_list) test_images_dir=test_images, test_list_file=test_list)
...@@ -74,7 +79,7 @@ def train(args, data_reader=ctc_reader): ...@@ -74,7 +79,7 @@ def train(args, data_reader=ctc_reader):
error_evaluator.reset(exe) error_evaluator.reset(exe)
if args.parallel: if args.parallel:
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=True, loss_name=sum_cost.name) use_cuda=True if args.use_gpu else False, loss_name=sum_cost.name)
fetch_vars = [sum_cost] + error_evaluator.metrics fetch_vars = [sum_cost] + error_evaluator.metrics
...@@ -85,8 +90,8 @@ def train(args, data_reader=ctc_reader): ...@@ -85,8 +90,8 @@ def train(args, data_reader=ctc_reader):
feed=get_feeder_data(data, place)) feed=get_feeder_data(data, place))
results = [np.array(result).sum() for result in results] results = [np.array(result).sum() for result in results]
else: else:
results = exe.run(feed=get_feeder_data(data, place), results = train_exe.run(feed=get_feeder_data(data, place),
fetch_list=fetch_vars) fetch_list=fetch_vars)
results = [result[0] for result in results] results = [result[0] for result in results]
return results return results
...@@ -105,17 +110,29 @@ def train(args, data_reader=ctc_reader): ...@@ -105,17 +110,29 @@ def train(args, data_reader=ctc_reader):
print "Saved model to: %s/%s." % (args.save_model_dir, filename) print "Saved model to: %s/%s." % (args.save_model_dir, filename)
iter_num = 0 iter_num = 0
while True: stop = False
while not stop:
total_loss = 0.0 total_loss = 0.0
total_seq_error = 0.0 total_seq_error = 0.0
batch_times = []
# train a pass # train a pass
for data in train_reader(): for data in train_reader():
iter_num += 1 if args.total_step > 0 and iter_num == args.total_step + args.skip_batch_num:
if iter_num > args.total_step: stop = True
return break
if iter_num < args.skip_batch_num:
print("Warm-up iteration")
if iter_num == args.skip_batch_num:
profiler.reset_profiler()
start = time.time()
results = train_one_batch(data) results = train_one_batch(data)
batch_time = time.time() - start
fps = args.batch_size / batch_time
batch_times.append(batch_time)
total_loss += results[0] total_loss += results[0]
total_seq_error += results[2] total_seq_error += results[2]
iter_num += 1
# training log # training log
if iter_num % args.log_period == 0: if iter_num % args.log_period == 0:
print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % ( print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % (
...@@ -127,7 +144,7 @@ def train(args, data_reader=ctc_reader): ...@@ -127,7 +144,7 @@ def train(args, data_reader=ctc_reader):
total_seq_error = 0.0 total_seq_error = 0.0
# evaluate # evaluate
if iter_num % args.eval_period == 0: if not args.skip_test and iter_num % args.eval_period == 0:
if model_average: if model_average:
with model_average.apply(exe): with model_average.apply(exe):
test(iter_num) test(iter_num)
...@@ -141,12 +158,35 @@ def train(args, data_reader=ctc_reader): ...@@ -141,12 +158,35 @@ def train(args, data_reader=ctc_reader):
save_model(args, exe, iter_num) save_model(args, exe, iter_num)
else: else:
save_model(args, exe, iter_num) save_model(args, exe, iter_num)
# Postprocess benchmark data
latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies)
latency_pc99 = np.percentile(latencies, 99)
fpses = np.divide(args.batch_size, latencies)
fps_avg = np.average(fpses)
fps_pc99 = np.percentile(fpses, 1)
# Benchmark output
print('\nTotal examples (incl. warm-up): %d' %
(iter_num * args.batch_size))
print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
latency_pc99))
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg,
fps_pc99))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
train(args, data_reader=ctc_reader) if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args, data_reader=ctc_reader)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
train(args, data_reader=ctc_reader)
else:
train(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer from crnn_ctc_model import ctc_infer
import numpy as np import numpy as np
...@@ -7,6 +8,7 @@ import ctc_reader ...@@ -7,6 +8,7 @@ import ctc_reader
import argparse import argparse
import functools import functools
import os import os
import time
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -16,6 +18,10 @@ add_arg('input_images_dir', str, None, "The directory of images.") ...@@ -16,6 +18,10 @@ add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.") add_arg('input_images_list', str, None, "The list file of images.")
add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.") add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.")
add_arg('use_gpu', bool, True, "Whether use GPU to infer.") add_arg('use_gpu', bool, True, "Whether use GPU to infer.")
add_arg('iterations', int, 0, "The number of iterations. Zero or less means whole test set. More than 0 means the test set might be looped until # of iterations is reached.")
add_arg('profile', bool, False, "Whether to use profiling.")
add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.")
add_arg('batch_size', int, 1, "The minibatch size.")
# yapf: enable # yapf: enable
...@@ -25,11 +31,14 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -25,11 +31,14 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence = infer(images, num_classes) sequence = infer(
images, num_classes, use_cudnn=True if args.use_gpu else False)
# data reader # data reader
infer_reader = data_reader.inference( infer_reader = data_reader.inference(
batch_size=args.batch_size,
infer_images_dir=args.input_images_dir, infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list) infer_list_file=args.input_images_list,
cycle=True if args.iterations > 0 else False)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
if args.use_gpu: if args.use_gpu:
...@@ -56,23 +65,67 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -56,23 +65,67 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path print "Init model from: %s." % args.model_path
batch_times = []
iters = 0
for data in infer_reader(): for data in infer_reader():
if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
break
if iters < args.skip_batch_num:
print("Warm-up itaration")
if iters == args.skip_batch_num:
profiler.reset_profiler()
start = time.time()
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed=get_feeder_data( feed=get_feeder_data(
data, place, need_label=False), data, place, need_label=False),
fetch_list=[sequence], fetch_list=[sequence],
return_numpy=False) return_numpy=False)
batch_time = time.time() - start
fps = args.batch_size / batch_time
batch_times.append(batch_time)
indexes = np.array(result[0]).flatten() indexes = np.array(result[0]).flatten()
if dict_map is not None: if dict_map is not None:
print "result: %s" % ([dict_map[index] for index in indexes], ) print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
[dict_map[index] for index in indexes], )
else: else:
print "result: %s" % (indexes, ) print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
indexes, )
iters += 1
latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies)
latency_pc99 = np.percentile(latencies, 99)
fpses = np.divide(args.batch_size, latencies)
fps_avg = np.average(fpses)
fps_pc99 = np.percentile(fpses, 1)
# Benchmark output
print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size))
print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
latency_pc99))
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
inference(args, data_reader=ctc_reader) if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
inference(args, data_reader=ctc_reader)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
inference(args, data_reader=ctc_reader)
else:
inference(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
......
## Introduction
Scripts enclosed in the folder serve as examples of commands that start training
and inference of a model, and are subject to further customisation.
# Running with MKL-DNN
In order to run training or inference using MKL-DNN library, please use
`FLAGS_use_mkldnn=1` environmental variable.
## Prerequisites
In order to run the training and inference, no special requirements are posed.
## Training
To run training on *CPU*, please execute:
```sh
source train.sh CPU
```
To run training on *CPU* with MKL-DNN, please execute:
```sh
source train.sh MKLDNN
```
To run training on *GPU*, please execute:
```sh
source train.sh GPU
```
## Inference
To perform inference on the trained model using *CPU*, please run:
```sh
source infer.sh CPU
```
To perform inference on the trained model using *CPU* with MKL-DNN, please run:
```sh
source infer.sh MKLDNN
```
To perform inference on the trained model using *GPU*, please run:
```sh
source infer.sh GPU
```
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
mode=$1 # gpu, cpu, mkldnn
if [ "$mode" = "CPU" ]; then
use_gpu="False"
model_path="cpu_model"
elif [ "$mode" = "GPU" ]; then
use_gpu="True"
model_path="gpu_model"
elif [ "$mode" = "MKLDNN" ]; then
use_gpu="False"
model_path="mkldnn_model"
export FLAGS_use_mkldnn=1
else
echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
exit 1
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,0,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="FALSE"
fi
else # HT is ON
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,1,0"
fi
fi
python ../infer.py \
--model_path $model_path/model_00001 \
--input_images_list ~/.cache/paddle/dataset/ctc_data/data/test.list \
--input_images_dir ~/.cache/paddle/dataset/ctc_data/data/test_images \
--use_gpu $use_gpu \
--batch_size 32 \
--iterations 5 \
--skip_batch_num 2
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
batch_size=32
core_num=`lscpu |grep -m1 "CPU(s)"|awk -F':' '{print $2}'|xargs`
mode=$1 # gpu, cpu, mkldnn
if [ "$mode" = "CPU" ]; then
if [ $core_num -gt $batch_size ]; then
echo "Batch size should be greater or equal to the number of
available cores, when parallel mode is set to True."
fi
use_gpu="False"
save_model_dir="cpu_model"
parallel="True"
elif [ "$mode" = "GPU" ]; then
use_gpu="True"
save_model_dir="gpu_model"
parallel="True"
elif [ "$mode" = "MKLDNN" ]; then
if [ $core_num -gt $batch_size ]; then
echo "Batch size should be greater or equal to the number of
available cores, when parallel mode is set to True."
fi
use_gpu="False"
save_model_dir="mkldnn_model"
parallel="False"
export FLAGS_use_mkldnn=1
else
echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
exit 1
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,0,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="FALSE"
fi
else # HT is ON
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,1,0"
fi
fi
python ../ctc_train.py \
--use_gpu $use_gpu \
--parallel $parallel \
--batch_size $batch_size \
--save_model_period 1 \
--total_step 1 \
--save_model_dir $save_model_dir
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册