提交 be80e25e 编写于 作者: Y Yibing Liu

Merge branch 'develop' of https://github.com/PaddlePaddle/models into dam_py3

......@@ -8,7 +8,7 @@ PaddlePaddle provides a rich set of computational units to enable users to adopt
- [fluid models](fluid): use PaddlePaddle's Fluid APIs. We especially recommend users to use Fluid models.
- [v2 models](v2): use PaddlePaddle's v2 APIs.
- [legacy models](legacy): use PaddlePaddle's v2 APIs.
## License
......
......@@ -2,7 +2,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.fluid as fluid
......
......@@ -158,7 +158,8 @@ class DuelingDQNModel(object):
for i, var in enumerate(policy_vars):
sync_op = fluid.layers.assign(policy_vars[i], target_vars[i])
sync_ops.append(sync_op)
sync_program = sync_program.prune(sync_ops)
# The prune API is deprecated, please don't use it any more.
sync_program = sync_program._prune(sync_ops)
return sync_program
def act(self, state, train_or_test):
......
......@@ -9,7 +9,7 @@ import gym
from gym import spaces
from gym.envs.atari.atari_env import ACTION_MEANING
from ale_python_interface import ALEInterface
from atari_py import ALEInterface
__all__ = ['AtariPlayer']
......
"""
CNN on mnist data using fluid api of paddlepaddle
"""
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
......
......@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import BIM
......
......@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.deepfool import DeepFoolAttack
......
......@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import FGSM
......
......@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import ILCM
......
......@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.saliency import JSMA
......
......@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.lbfgs import LBFGS
......
......@@ -9,7 +9,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import MIFGSM
......
......@@ -55,6 +55,12 @@ for more detailed explanation about the arguments, please run
python ../train_and_evaluate.py --help
```
By default, the training is executed on one single GPU, which can be switched to multiple-GPU mode easily by simply resetting the visible devices in `train.sh`, e.g.,
```
export CUDA_VISIBLE_DEVICES=0,1,2,3
```
4) Run test by
```
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0
python -u ../test_and_evaluate.py --use_cuda \
--ext_eval \
--data_path ./data/data.pkl \
--save_path ./eval_10000 \
--model_path models/step_10000 \
--batch_size 100 \
--save_path ./eval_3900 \
--model_path models/step_3900 \
--channel1_num 16 \
--batch_size 200 \
--vocab_size 172130 \
--emb_size 200 \
--_EOS_ 1
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \
--ext_eval \
--word_emb_init ./data/word_embedding.pkl \
--save_path ./models \
--batch_size 100 \
--batch_size 256 \
--vocab_size 172130 \
--channel1_num 16 \
--emb_size 200 \
--_EOS_ 1
......@@ -6,18 +6,25 @@ import utils.layers as layers
class Net(object):
def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
stack_num):
stack_num, channel1_num, channel2_num):
self._max_turn_num = max_turn_num
self._max_turn_len = max_turn_len
self._vocab_size = vocab_size
self._emb_size = emb_size
self._stack_num = stack_num
self._channel1_num = channel1_num
self._channel2_num = channel2_num
self.word_emb_name = "shared_word_emb"
self.use_stack_op = True
self.use_mask_cache = True
self.use_sparse_embedding = True
def set_word_embedding(self, word_emb, place):
word_emb_param = fluid.global_scope().find_var(
self.word_emb_name).get_tensor()
word_emb_param.set(word_emb, place)
def create_network(self):
mask_cache = dict() if self.use_mask_cache else None
......@@ -136,7 +143,7 @@ class Net(object):
t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
# sim shape: [batch_size, 2*(stack_num+2), max_turn_len, max_turn_len]
# sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]
sim = fluid.layers.matmul(
x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
sim_turns.append(sim)
......@@ -147,10 +154,9 @@ class Net(object):
for index in six.moves.xrange(len(sim_turns)):
sim_turns[index] = fluid.layers.unsqueeze(
input=sim_turns[index], axes=[2])
# sim shape: [batch_size, 2*(stack_num+2), max_turn_num, max_turn_len, max_turn_len]
# sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len]
sim = fluid.layers.concat(input=sim_turns, axis=2)
# for douban
final_info = layers.cnn_3d(sim, 32, 16)
final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num)
loss, logits = layers.loss(final_info, label)
return loss, logits
......@@ -89,6 +89,16 @@ def parse_args():
type=int,
default=5,
help='The number of stacked attentive modules in network.')
parser.add_argument(
'--channel1_num',
type=int,
default=32,
help="The channels' number of the 1st conv3d layer's output.")
parser.add_argument(
'--channel2_num',
type=int,
default=16,
help="The channels' number of the 2nd conv3d layer's output.")
args = parser.parse_args()
return args
......@@ -110,7 +120,8 @@ def test(args):
}
dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
args.emb_size, args.stack_num)
args.emb_size, args.stack_num, args.channel1_num,
args.channel2_num)
loss, logits = dam.create_network()
loss.persistable = True
......
......@@ -88,6 +88,16 @@ def parse_args():
type=int,
default=5,
help='The number of stacked attentive modules in network.')
parser.add_argument(
'--channel1_num',
type=int,
default=32,
help="The channels' number of the 1st conv3d layer's output.")
parser.add_argument(
'--channel2_num',
type=int,
default=16,
help="The channels' number of the 2nd conv3d layer's output.")
args = parser.parse_args()
return args
......@@ -105,7 +115,8 @@ def train(args):
}
dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
args.emb_size, args.stack_num)
args.emb_size, args.stack_num, args.channel1_num,
args.channel2_num)
loss, logits = dam.create_network()
loss.persistable = True
......@@ -136,6 +147,9 @@ def train(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("device count %d" % dev_count)
print("theoretical memory usage: ")
print(fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size))
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
......@@ -157,7 +171,8 @@ def train(args):
print("start loading word embedding init ...")
word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype(
'float32')
print("finish loading word embedding init ...")
dam.set_word_embedding(word_emb, place)
print("finish init word embedding ...")
print("start loading data ...")
train_data, val_data, test_data = pickle.load(open(args.data_path, 'rb'))
......@@ -171,8 +186,6 @@ def train(args):
print_step = max(1, batch_num / (dev_count * 100))
save_step = max(1, batch_num / (dev_count * 10))
word_emb_inited = False
print("begin model training ...")
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
......@@ -187,12 +200,8 @@ def train(args):
for dev in xrange(dev_count):
index = it * dev_count + dev
feed_dict = reader.make_one_batch_input(train_batches, index)
if word_emb_inited is False and args.word_emb_init is not None:
feed_dict[dam.word_emb_name] = word_emb
feed_list.append(feed_dict)
word_emb_inited = True
cost = train_exe.run(feed=feed_list, fetch_list=[loss.name])
ave_cost += np.array(cost[0]).mean()
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0
python -u ../test_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \
--save_path ./ \
--model_path models/step_10000 \
--batch_size 100 \
--save_path ./step_3900 \
--model_path ./models/step_3900 \
--batch_size 200 \
--vocab_size 434512 \
--emb_size 200 \
--_EOS_ 28270
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \
--word_emb_init ./data/word_embedding.pkl \
--save_path ./models \
--batch_size 100 \
--batch_size 256 \
--vocab_size 434512 \
--emb_size 200 \
--_EOS_ 28270
......
deeplabv3plus_xception65_initialize.params
deeplabv3plus.params
deeplabv3plus.tar.gz
DeepLab运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。
DeepLab运行本目录下的程序示例需要使用PaddlePaddle Fluid v1.0.0版本或以上。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本,如果使用GPU,该程序需要使用cuDNN v7版本。
## 代码结构
......@@ -41,10 +41,12 @@ data/cityscape/
如果需要从头开始训练模型,用户需要下载我们的初始化模型
```
wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus_xception65_initialize.tar.gz
tar -xf deeplabv3plus_xception65_initialize.tar.gz && rm deeplabv3plus_xception65_initialize.tar.gz
```
如果需要最终训练模型进行fine tune或者直接用于预测,请下载我们的最终模型
```
wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus.tar.gz
tar -xf deeplabv3plus.tar.gz && rm deeplabv3plus.tar.gz
```
......@@ -70,11 +72,11 @@ python train.py --help
```
python ./train.py \
--batch_size=8 \
--parallel=true
--parallel=true \
--train_crop_size=769 \
--total_step=90000 \
--init_weights_path=$INIT_WEIGHTS_PATH \
--save_weights_path=$SAVE_WEIGHTS_PATH \
--init_weights_path=deeplabv3plus_xception65_initialize.params \
--save_weights_path=output \
--dataset_path=$DATASET_PATH
```
......@@ -82,11 +84,10 @@ python ./train.py \
执行以下命令在`Cityscape`测试数据集上进行测试:
```
python ./eval.py \
--init_weights_path=$INIT_WEIGHTS_PATH \
--init_weights=deeplabv3plus.params \
--dataset_path=$DATASET_PATH
```
需要通过选项`--model_path`指定模型文件。
测试脚本的输出的评估指标为[mean IoU]()。
需要通过选项`--model_path`指定模型文件。测试脚本的输出的评估指标为mean IoU。
## 实验结果
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
......@@ -91,7 +94,7 @@ exe = fluid.Executor(place)
exe.run(sp)
if args.init_weights_path:
print "load from:", args.init_weights_path
print("load from:", args.init_weights_path)
load_model()
dataset = CityscapeDataset(args.dataset_path, 'val')
......@@ -118,7 +121,7 @@ for i, imgs, labels, names in batches:
mp = (wrong + right) != 0
miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp])))
if args.verbose:
print 'step: %s, mIoU: %s' % (i + 1, miou2)
print('step: %s, mIoU: %s' % (i + 1, miou2))
else:
print '\rstep: %s, mIoU: %s' % (i + 1, miou2),
print('\rstep: %s, mIoU: %s' % (i + 1, miou2))
sys.stdout.flush()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
......@@ -50,7 +53,7 @@ def append_op_result(result, name):
def conv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights'
if kargs.has_key('bias_attr') and kargs['bias_attr']:
if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = name_scope + 'biases'
else:
kargs['bias_attr'] = False
......@@ -62,7 +65,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
N, C, H, W = input.shape
if C % G != 0:
print "group can not divide channle:", C, G
print("group can not divide channle:", C, G)
for d in range(10):
for t in [d, -d]:
if G + t <= 0: continue
......@@ -70,7 +73,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
G = G + t
break
if C % G == 0:
print "use group size:", G
print("use group size:", G)
break
assert C % G == 0
param_shape = (G, )
......@@ -139,7 +142,7 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None):
filter,
stride,
groups=input.shape[1],
padding=(filter / 2) * dilation,
padding=(filter // 2) * dilation,
dilation=dilation)
input = bn(input)
if act: input = act(input)
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
import os
import six
default_config = {
"shuffle": True,
......@@ -30,7 +35,7 @@ def slice_with_pad(a, s, value=0):
pr = 0
pads.append([pl, pr])
slices.append([l, r])
slices = map(lambda x: slice(x[0], x[1], 1), slices)
slices = list(map(lambda x: slice(x[0], x[1], 1), slices))
a = a[slices]
a = np.pad(a, pad_width=pads, mode='constant', constant_values=value)
return a
......@@ -38,11 +43,17 @@ def slice_with_pad(a, s, value=0):
class CityscapeDataset:
def __init__(self, dataset_dir, subset='train', config=default_config):
import commands
label_dirname = dataset_dir + 'gtFine/' + subset
label_files = commands.getoutput(
"find %s -type f | grep labelTrainIds | sort" %
label_dirname).splitlines()
label_dirname = os.path.join(dataset_dir, 'gtFine/' + subset)
if six.PY2:
import commands
label_files = commands.getoutput(
"find %s -type f | grep labelTrainIds | sort" %
label_dirname).splitlines()
else:
import subprocess
label_files = subprocess.getstatusoutput(
"find %s -type f | grep labelTrainIds | sort" %
label_dirname)[-1].splitlines()
self.label_files = label_files
self.label_dirname = label_dirname
self.index = 0
......@@ -50,7 +61,7 @@ class CityscapeDataset:
self.dataset_dir = dataset_dir
self.config = config
self.reset()
print "total number", len(label_files)
print("total number", len(label_files))
def reset(self, shuffle=False):
self.index = 0
......@@ -66,13 +77,14 @@ class CityscapeDataset:
shape = self.config["crop_size"]
while True:
ln = self.label_files[self.index]
img_name = self.dataset_dir + 'leftImg8bit/' + self.subset + ln[len(
self.label_dirname):]
img_name = os.path.join(
self.dataset_dir,
'leftImg8bit/' + self.subset + ln[len(self.label_dirname):])
img_name = img_name.replace('gtFine_labelTrainIds', 'leftImg8bit')
label = cv2.imread(ln)
img = cv2.imread(img_name)
if img is None:
print "load img failed:", img_name
print("load img failed:", img_name)
self.next_img()
else:
break
......@@ -128,5 +140,7 @@ class CityscapeDataset:
from prefetch_generator import BackgroundGenerator
batches = BackgroundGenerator(batches, 100)
except:
print "You can install 'prefetch_generator' for acceleration of data reading."
print(
"You can install 'prefetch_generator' for acceleration of data reading."
)
return batches
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
......@@ -126,13 +129,12 @@ exe = fluid.Executor(place)
exe.run(sp)
if args.init_weights_path:
print "load from:", args.init_weights_path
print("load from:", args.init_weights_path)
load_model()
dataset = CityscapeDataset(args.dataset_path, 'train')
if args.parallel:
print "Using ParallelExecutor."
exe_p = fluid.ParallelExecutor(
use_cuda=True, loss_name=loss_mean.name, main_program=tp)
......@@ -149,9 +151,9 @@ for i, imgs, labels, names in batches:
'label': labels},
fetch_list=[pred, loss_mean])
if i % 100 == 0:
print "Model is saved to", args.save_weights_path
print("Model is saved to", args.save_weights_path)
save_model()
print "step %s, loss: %s" % (i, np.mean(retv[1]))
print("step %s, loss: %s" % (i, np.mean(retv[1])))
print "Training done. Model is saved to", args.save_weights_path
print("Training done. Model is saved to", args.save_weights_path)
save_model()
......@@ -10,3 +10,4 @@ output*
pred
eval_tools
box*
PyramidBox_WiderFace*
......@@ -427,6 +427,7 @@ class PyramidBox(object):
overlap_threshold=0.35,
neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return loss
def train(self):
......
......@@ -189,13 +189,13 @@ def train(args, config, train_params, train_file_list):
fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
if batch_id % 10 == 0:
if not args.use_pyramidbox:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
pass_id, batch_id, fetch_vars[0],
start_time - prev_start_time))
else:
print("Pass {0}, batch {1}, face loss {2}, " \
"head loss {3}, " \
"time {4}".format(pass_id,
print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
"head loss {:.6f}, " \
"time {:.5f}".format(pass_id,
batch_id, fetch_vars[0], fetch_vars[1],
start_time - prev_start_time))
if pass_id % 1 == 0 or pass_id == epoc_num - 1:
......
......@@ -82,9 +82,6 @@ def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
image_name = image_path.split('/')[-1]
image_class = image_path.split('/')[-2]
image_name = image_name.encode('utf-8')
image_class = image_class.encode('utf-8')
odir = os.path.join(output_dir, image_class)
if not os.path.exists(odir):
os.makedirs(odir)
......
# Faster RCNN Objective Detection
---
## Table of Contents
- [Installation](#installation)
- [Introduction](#introduction)
- [Data preparation](#data-preparation)
- [Training](#training)
- [Finetuning](#finetuning)
- [Evaluation](#evaluation)
- [Inference and Visualization](#inference-and-visualization)
- [Appendix](#appendix)
## Installation
Running sample code in this directory requires PaddelPaddle Fluid v.1.0.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/zh/0.15.0/beginners_guide/install/install_doc.html#paddlepaddle) and make an update.
## Introduction
[Faster Rcnn](https://arxiv.org/abs/1506.01497) is a typical two stage detector. The total framework of network can be divided into four parts, as shown below:
<p align="center">
<img src="image/Faster_RCNN.jpg" height=400 width=400 hspace='10'/> <br />
Faster RCNN model
</p>
1. Base conv layer。As a CNN objective dection, Faster RCNN extract feature maps using a basic convolutional network. The feature maps then can be shared by RPN and fc layers. This sampel uses [ResNet-50](https://arxiv.org/abs/1512.03385) as base conv layer.
2. Region Proposal Network (RPN)。RPN generates proposals for detection。This block generates anchors by a set of size and ratio and classifies anchors into fore-ground and back-ground by softmax. Then refine anchors to obtain more precise proposals using box regression.
3. RoI pooling。This layer takes feature maps and proposals as input. The proposals are mapped to feature maps and pooled to the same size. The output are sent to fc layers for classification and regression.
4. Detection layer。Using the output of roi pooling to compute the class and locatoin of each proposal in two fc layers.
## Data preparation
Train the model on [MS-COCO dataset](http://cocodataset.org/#download), download dataset as below:
cd dataset/coco
./download.sh
## Training
After data preparation, one can start the training step by:
python train.py \
--max_size=1333 \
--scales=800 \
--batch_size=8 \
--model_save_dir=output/
- Set ```export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7``` to specifiy 8 GPU to train.
- For more help on arguments:
python train.py --help
**download the pre-trained model:** This sample provides Resnet-50 pre-trained model which is converted from Caffe. The model fuses the parameters in batch normalization layer. One can download pre-trained model as:
sh ./pretrained/download.sh
Set `pretrained_model` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well.
**data reader introduction:**
* Data reader is defined in `reader.py`.
* Scaling the short side of all images to `scales`. If the long side is larger than `max_size`, then scaling the long side to `max_size`.
* In training stage, images are horizontally flipped.
* Images in the same batch can be padding to the same size.
**model configuration:**
* Use RoIPooling.
* NMS threshold=0.7. During training, pre\_nms=12000, post\_nms=2000; during test, pre\_nms=6000, post\_nms=1000.
* In generating proposal lables, fg\_fraction=0.25, fg\_thresh=0.5, bg\_thresh_hi=0.5, bg\_thresh\_lo=0.0.
* In rpn target assignment, rpn\_fg\_fraction=0.5, rpn\_positive\_overlap=0.7, rpn\_negative\_overlap=0.3.
**training strategy:**
* Use momentum optimizer with momentum=0.9.
* Weight decay is 0.0001.
* In first 500 iteration, the learning rate increases linearly from 0.00333 to 0.01. Then lr is decayed at 120000, 160000 iteration with multiplier 0.1, 0.01. The maximum iteration is 180000.
* Set the learning rate of bias to two times as global lr in non basic convolutional layers.
* In basic convolutional layers, parameters of affine layers and res body do not update.
* Use Nvidia Tesla V100 8GPU, total time for training is about 40 hours.
Training result is shown as below:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN train loss
</p>
* Fluid all padding: Each image padding to 1333\*1333.
* Fluid minibatch padding: Images in one batch padding to the same size. This method is same as detectron.
* Fluid no padding: Images without padding.
## Finetuning
Finetuning is to finetune model weights in a specific task by loading pretrained weights. After initializing ```pretrained_model```, one can finetune a model as:
python train.py
--max_size=1333 \
--scales=800 \
--pretrained_model=${path_to_pretrain_model} \
--batch_size= 8\
--model_save_dir=output/
## Evaluation
Evaluation is to evaluate the performance of a trained model. This sample provides `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). To use `eval_coco_map.py` , [cocoapi](https://github.com/cocodataset/cocoapi) is needed. Install the cocoapi:
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# if cython is not installed
pip install Cython
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
`eval_coco_map.py` is the main executor for evalution, one can start evalution step by:
python eval_coco_map.py \
--dataset=coco2017 \
--pretrained_mode=${path_to_pretrain_model} \
--batch_size=1 \
--nms_threshold=0.5 \
--score_threshold=0.05
Evalutaion result is shown as below:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
| Model | Batch size | Max iteration | mAP |
| :------------------------------ | :------------: | :-------------------:|------: |
| Detectron | 8 | 180000 | 0.315 |
| Fluid minibatch padding | 8 | 180000 | 0.314 |
| Fluid all padding | 8 | 180000 | 0.308 |
| Fluid no padding |6 | 240000 | 0.317 |
* Fluid all padding: Each image padding to 1333\*1333.
* Fluid minibatch padding: Images in one batch padding to the same size. This method is same as detectron.
* Fluid no padding: Images without padding.
## Inference and Visualization
Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, one can start infer step by:
python infer.py \
--dataset=coco2017 \
--pretrained_model=${path_to_pretrain_model} \
--image_path=data/COCO17/val2017/ \
--image_name=000000000139.jpg \
--draw_threshold=0.6
Visualization of infer result is shown as below:
<p align="center">
<img src="image/000000000139.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000127517.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000203864.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000515077.jpg" height=300 width=400 hspace='10'/> <br />
Faster RCNN Visualization Examples
</p>
# Faster RCNN 目标检测
---
## 内容
- [安装](#安装)
- [简介](#简介)
- [数据准备](#数据准备)
- [模型训练](#模型训练)
- [参数微调](#参数微调)
- [模型评估](#模型评估)
- [模型推断及可视化](#模型推断及可视化)
- [附录](#附录)
## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.0.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/0.15.0/beginners_guide/install/install_doc.html#paddlepaddle)中的说明来更新PaddlePaddle。
## 简介
[Faster Rcnn](https://arxiv.org/abs/1506.01497) 是典型的两阶段目标检测器。如下图所示,整体网络可以分为4个主要内容:
<p align="center">
<img src="image/Faster_RCNN.jpg" height=400 width=400 hspace='10'/> <br />
Faster RCNN 目标检测模型
</p>
1. 基础卷积层。作为一种卷积神经网络目标检测方法,Faster RCNN首先使用一组基础的卷积网络提取图像的特征图。特征图被后续RPN层和全连接层共享。本示例采用[ResNet-50](https://arxiv.org/abs/1512.03385)作为基础卷积层。
2. 区域生成网络(RPN)。RPN网络用于生成候选区域(proposals)。该层通过一组固定的尺寸和比例得到一组锚点(anchors), 通过softmax判断锚点属于前景或者背景,再利用区域回归修正锚点从而获得精确的候选区域。
3. RoI池化。该层收集输入的特征图和候选区域,将候选区域映射到特征图中并池化为统一大小的区域特征图,送入全连接层判定目标类别。
4. 检测层。利用区域特征图计算候选区域的类别,同时再次通过区域回归获得检测框最终的精确位置。
## 数据准备
[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。
cd dataset/coco
./download.sh
## 模型训练
数据准备完毕后,可以通过如下的方式启动训练:
python train.py \
--max_size=1333 \
--scales=800 \
--batch_size=8 \
--model_save_dir=output/ \
--pretrained_model=${path_to_pretrain_model}
- 通过设置export CUDA\_VISIBLE\_DEVICES=0,1,2,3,4,5,6,7指定8卡GPU训练。
- 可选参数见:
python train.py --help
**下载预训练模型:** 本示例提供Resnet-50预训练模型,该模性转换自Caffe,并对批标准化层(Batch Normalization Layer)进行参数融合。采用如下命令下载预训练模型:
sh ./pretrained/download.sh
通过初始化`pretrained_model` 加载预训练模型。同时在参数微调时也采用该设置加载已训练模型。
**数据读取器说明:** 数据读取器定义在reader.py中。所有图像将短边等比例缩放至`scales`,若长边大于`max_size`, 则再次将长边等比例缩放至`max_iter`。在训练阶段,对图像采用水平翻转。支持将同一个batch内的图像padding为相同尺寸。
**模型设置:**
* 使用RoIPooling。
* 训练过程pre\_nms=12000, post\_nms=2000,测试过程pre\_nms=6000, post\_nms=1000。nms阈值为0.7。
* RPN网络得到labels的过程中,fg\_fraction=0.25,fg\_thresh=0.5,bg\_thresh_hi=0.5,bg\_thresh\_lo=0.0
* RPN选择anchor时,rpn\_fg\_fraction=0.5,rpn\_positive\_overlap=0.7,rpn\_negative\_overlap=0.3
下图为模型训练结果:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN 训练loss
</p>
* Fluid all padding: 每张图像填充为1333\*1333大小。
* Fluid minibatch padding: 同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid no padding: 不对图像做填充处理。
**训练策略:**
* 采用momentum优化算法训练Faster RCNN,momentum=0.9。
* 权重衰减系数为0.0001,前500轮学习率从0.00333线性增加至0.01。在120000,160000轮时使用0.1,0.01乘子进行学习率衰减,最大训练180000轮。
* 非基础卷积层卷积bias学习率为整体学习率2倍。
* 基础卷积层中,affine_layers参数不更新,res2层参数不更新。
* 使用Nvidia Tesla V100 8卡并行,总共训练时长大约40小时。
## 模型评估
模型评估是指对训练完毕的模型评估各类性能指标。本示例采用[COCO官方评估](http://cocodataset.org/#detections-eval),使用前需要首先下载[cocoapi](https://github.com/cocodataset/cocoapi)
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# if cython is not installed
pip install Cython
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
`eval_coco_map.py`是评估模块的主要执行程序,调用示例如下:
python eval_coco_map.py \
--dataset=coco2017 \
--pretrained_mode=${path_to_pretrain_model} \
--batch_size=1 \
--nms_threshold=0.5 \
--score_threshold=0.05
下图为模型评估结果:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
| 模型 | 批量大小 | 迭代次数 | mAP |
| :------------------------------ | :------------: | :------------------: |------: |
| Detectron | 8 | 180000 | 0.315 |
| Fluid minibatch padding | 8 | 180000 | 0.314 |
| Fluid all padding | 8 | 180000 | 0.308 |
| Fluid no padding |6 | 240000 | 0.317 |
* Fluid all padding: 每张图像填充为1333\*1333大小。
* Fluid minibatch padding: 同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid no padding: 不对图像做填充处理。
## 模型推断及可视化
模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下:
python infer.py \
--dataset=coco2017 \
--pretrained_model=${path_to_pretrain_model} \
--image_path=data/COCO17/val2017/ \
--image_name=000000000139.jpg \
--draw_threshold=0.6
下图为模型可视化预测结果:
<p align="center">
<img src="image/000000000139.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000127517.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000203864.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000515077.jpg" height=300 width=400 hspace='10'/> <br />
Faster RCNN 预测可视化
</p>
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import numpy as np
import argparse
import functools
from eval_helper import get_nmsed_box
from eval_helper import get_dt_res
import paddle
import paddle.fluid as fluid
import reader
from utility import print_arguments, parse_args
# A special mAP metric for COCO dataset, which averages AP in different IoUs.
# To use this eval_coco_map.py, [cocoapi](https://github.com/cocodataset/cocoapi) is needed.
import models.model_builder as model_builder
import models.resnet as resnet
import json
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import numpy as np
import paddle.fluid as fluid
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
......
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://paddlemodels.bj.bcebos.com/faster_rcnn/imagenet_resnet50_fusebn.tar.gz
echo "Extracting..."
tar -xf imagenet_resnet50_fusebn.tar.gz
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import time
import numpy as np
import argparse
import functools
import shutil
import cPickle
from utility import add_arguments, print_arguments
from utility import parse_args, add_arguments, print_arguments
import paddle
import paddle.fluid as fluid
......@@ -16,50 +27,12 @@ import models.model_builder as model_builder
import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
# ENV
add_arg('parallel', bool, True, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.")
add_arg('dataset', str, 'coco2017', "coco2014, coco2017, and pascalvoc.")
add_arg('data_dir', str, 'data/COCO17', "data directory")
add_arg('skip_reader', bool, False, "Whether to skip data reader.")
add_arg('use_profile', bool, False, "Whether to use profiler tool.")
add_arg('class_num', int, 81, "Class number.")
add_arg('use_pyreader', bool, False, "Class number.")
# SOLVER
add_arg('learning_rate', float, 0.01, "Learning rate.")
add_arg('num_iteration', int, 10, "Epoch number.")
# RPN
add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.")
add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.")
add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.")
add_arg('rpn_stride', float, 16., "Stride of the feature map that RPN is attached.")
# FAST RCNN
# TRAIN TEST
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('max_size', int, 1333, "The max resized image size.")
add_arg('scales', int, [800], "The resized image height.")
add_arg('batch_size_per_im',int, 512, "fast rcnn head batch size")
add_arg('mean_value', float, [102.9801, 115.9465, 122.7717], "pixel mean")
add_arg('debug', bool, False, "Debug mode")
#yapf: enable
def train(cfg):
batch_size = cfg.batch_size
learning_rate = cfg.learning_rate
image_shape = [3, cfg.max_size, cfg.max_size]
num_iterations = cfg.num_iteration
if cfg.debug:
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
import random
random.seed(0)
np.random.seed(0)
num_iterations = cfg.max_iter
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
......@@ -72,21 +45,22 @@ def train(cfg):
use_random=False)
model.build_model(image_shape)
loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss()
loss_cls.persistable=True
loss_bbox.persistable=True
rpn_cls_loss.persistable=True
rpn_reg_loss.persistable=True
loss_cls.persistable = True
loss_bbox.persistable = True
rpn_cls_loss.persistable = True
rpn_reg_loss.persistable = True
loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss
boundaries = [120000, 160000]
values = [learning_rate, learning_rate*0.1, learning_rate*0.01]
values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(learning_rate=learning_rate,
learning_rate=exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries,
values=values,
warmup_iter=500,
warmup_factor=1.0/3.0),
warmup_factor=1.0 / 3.0),
regularization=fluid.regularizer.L2Decay(0.0001),
momentum=0.9)
optimizer.minimize(loss)
......@@ -98,22 +72,33 @@ def train(cfg):
exe.run(fluid.default_startup_program())
if cfg.pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
if cfg.parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=bool(cfg.use_gpu), loss_name=loss.name)
assert cfg.batch_size % devices_num == 0, \
"batch_size = %d, devices_num = %d" %(cfg.batch_size, devices_num)
batch_size_per_dev = cfg.batch_size / devices_num
if cfg.use_pyreader:
train_reader = reader.train(cfg, batch_size=1, shuffle=not cfg.debug)
train_reader = reader.train(
cfg,
batch_size=batch_size_per_dev,
total_batch_size=cfg.batch_size,
padding_total=cfg.padding_minibatch,
shuffle=False)
py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader)
else:
train_reader = reader.train(cfg, batch_size=cfg.batch_size, shuffle=not cfg.debug)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
train_reader = reader.train(
cfg, batch_size=cfg.batch_size, shuffle=False)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss]
......@@ -124,22 +109,27 @@ def train(cfg):
for batch_id in range(iterations):
start_time = time.time()
data = train_reader().next()
data = next(train_reader())
end_time = time.time()
reader_time.append(end_time - start_time)
start_time = time.time()
losses = train_exe.run(fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
if cfg.parallel:
losses = train_exe.run(fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
else:
losses = exe.run(fluid.default_main_program(),
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
end_time = time.time()
run_time.append(end_time - start_time)
total_images += data[0][0].shape[0]
total_images += len(data)
lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor())
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(
batch_id, lr[0], losses[0][0]))
lr = np.array(fluid.global_scope().find_var('learning_rate')
.get_tensor())
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[0],
losses[0][0]))
return reader_time, run_time, total_images
def run_pyreader(iterations):
reader_time = [0]
run_time = []
......@@ -149,13 +139,19 @@ def train(cfg):
try:
for batch_id in range(iterations):
start_time = time.time()
losses = train_exe.run(fetch_list=[v.name for v in fetch_list])
if cfg.parallel:
losses = train_exe.run(
fetch_list=[v.name for v in fetch_list])
else:
losses = exe.run(fluid.default_main_program(),
fetch_list=[v.name for v in fetch_list])
end_time = time.time()
run_time.append(end_time - start_time)
total_images += devices_num
lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor())
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(
batch_id, lr[0], losses[0][0]))
lr = np.array(fluid.global_scope().find_var('learning_rate')
.get_tensor())
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[
0], losses[0][0]))
except fluid.core.EOFException:
py_reader.reset()
......@@ -167,20 +163,23 @@ def train(cfg):
run_func(2)
# profiling
start = time.time()
if cfg.use_profile:
use_profile = False
if use_profile:
with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
reader_time, run_time, total_images = run(num_iterations)
reader_time, run_time, total_images = run_func(num_iterations)
else:
reader_time, run_time, total_images = run_func(num_iterations)
end = time.time()
total_time = end - start
print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".format(
total_time, np.sum(reader_time), np.sum(run_time), total_images / total_time))
print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".
format(total_time,
np.sum(reader_time),
np.sum(run_time), total_images / total_time))
if __name__ == '__main__':
args = parser.parse_args()
args = parse_args()
print_arguments(args)
data_args = reader.Settings(args)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -150,6 +150,8 @@ def coco(settings,
else:
for roidb in roidbs:
if settings.image_name not in roidb['image']:
continue
im, im_info, im_id = roidb_reader(roidb, mode)
batch_out = [(im, im_info, im_id)]
yield batch_out
......
......@@ -26,7 +26,6 @@ from __future__ import print_function
from __future__ import unicode_literals
import copy
import cPickle as pickle
import logging
import numpy as np
import os
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import sys
import numpy as np
import argparse
import functools
import time
import shutil
import cPickle
from utility import parse_args, print_arguments, SmoothedValue
import paddle
......@@ -117,7 +132,7 @@ def train(cfg):
iter_id, lr[0],
smoothed_loss.get_median_value(
), start_time - prev_start_time))
#print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0])
sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0:
save_model("model_iter{}".format(iter_id))
except fluid.core.EOFException:
......@@ -143,7 +158,7 @@ def train(cfg):
print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
iter_id, lr[0],
smoothed_loss.get_median_value(), start_time - prev_start_time))
#print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0])
sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0:
save_model("model_iter{}".format(iter_id))
if (iter_id + 1) == cfg.max_iter:
......
"""Contains common utility functions."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
......@@ -12,6 +11,9 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
"""
Contains common utility functions.
"""
from __future__ import absolute_import
from __future__ import division
......@@ -83,8 +85,7 @@ class SmoothedValue(object):
def parse_args():
"""
return all args
"""return all args
"""
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
......
......@@ -12,8 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import six
import argparse
import functools
import matplotlib
......@@ -40,7 +44,9 @@ add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
def loss(x, label):
return fluid.layers.mean(x * (label - 0.5))
return fluid.layers.mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=x, label=label))
def train(args):
......@@ -67,7 +73,10 @@ def train(args):
g_program_test = dg_program.clone(for_test=True)
dg_logit = D_cond(g_img, conditions)
dg_loss = loss(dg_logit, 1)
dg_loss = loss(
dg_logit,
fluid.layers.fill_constant_batch_size_like(
input=noise, dtype='float32', shape=[-1, 1], value=1.0))
opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
......@@ -97,7 +106,7 @@ def train(args):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
real_image = np.array(map(lambda x: x[0], data)).reshape(
real_image = np.array(list(map(lambda x: x[0], data))).reshape(
-1, 784).astype('float32')
conditions_data = np.array([x[1] for x in data]).reshape(
[-1, 1]).astype("float32")
......@@ -133,7 +142,7 @@ def train(args):
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
for _ in xrange(NUM_TRAIN_TIMES_OF_DG):
for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
......@@ -154,7 +163,7 @@ def train(args):
total_images = np.concatenate([real_image, generated_images])
fig = plot(total_images)
msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format(
pass_id, batch_id, d_loss_np, dg_loss_np,
pass_id, batch_id, np.mean(d_loss_np), dg_loss_np,
check(generated_images))
print(msg)
plt.title(msg)
......
......@@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import argparse
import functools
import matplotlib
import six
import numpy as np
import paddle
import paddle.fluid as fluid
......@@ -32,15 +36,17 @@ LEARNING_RATE = 2e-4
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 121, "Minibatch size.")
add_arg('batch_size', int, 128, "Minibatch size.")
add_arg('epoch', int, 20, "The number of epoched to be trained.")
add_arg('output', str, "./output", "The directory the model and the test result to be saved to.")
add_arg('output', str, "./output_dcgan", "The directory the model and the test result to be saved to.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
# yapf: enable
def loss(x, label):
return fluid.layers.mean(x * (label - 0.5))
return fluid.layers.mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=x, label=label))
def train(args):
......@@ -63,7 +69,10 @@ def train(args):
g_program_test = dg_program.clone(for_test=True)
dg_logit = D(g_img)
dg_loss = loss(dg_logit, 1)
dg_loss = loss(
dg_logit,
fluid.layers.fill_constant_batch_size_like(
input=noise, dtype='float32', shape=[-1, 1], value=1.0))
opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
......@@ -93,7 +102,7 @@ def train(args):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
real_image = np.array(map(lambda x: x[0], data)).reshape(
real_image = np.array(list(map(lambda x: x[0], data))).reshape(
-1, 784).astype('float32')
real_labels = np.ones(
shape=[real_image.shape[0], 1], dtype='float32')
......@@ -123,7 +132,7 @@ def train(args):
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
for _ in xrange(NUM_TRAIN_TIMES_OF_DG):
for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
......@@ -139,9 +148,9 @@ def train(args):
fetch_list={g_img})[0]
total_images = np.concatenate([real_image, generated_images])
fig = plot(total_images)
msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format(
pass_id, batch_id, d_loss_np, dg_loss_np,
check(generated_images))
msg = "Epoch ID={0} Batch ID={1} D-Loss={2} DG-Loss={3}\n gen={4}".format(
pass_id, batch_id,
np.mean(d_loss_np), dg_loss_np, check(generated_images))
print(msg)
plt.title(msg)
plt.savefig(
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from utility import get_parent_function_name
......@@ -104,13 +107,13 @@ def D_cond(image, y):
def G_cond(z, y):
s_h, s_w = output_height, output_width
s_h2, s_h4 = int(s_h / 2), int(s_h / 4)
s_w2, s_w4 = int(s_w / 2), int(s_w / 4)
s_h2, s_h4 = int(s_h // 2), int(s_h // 4)
s_w2, s_w4 = int(s_w // 2), int(s_w // 4)
yb = fluid.layers.reshape(y, [-1, y_dim, 1, 1]) #NCHW
z = fluid.layers.concat([z, y], 1)
h0 = bn(fc(z, gfc_dim / 2), act='relu')
h0 = bn(fc(z, gfc_dim // 2), act='relu')
h0 = fluid.layers.concat([h0, y], 1)
h1 = bn(fc(h0, gf_dim * 2 * s_h4 * s_w4), act='relu')
......@@ -134,8 +137,8 @@ def D(x):
def G(x):
x = bn(fc(x, gfc_dim))
x = bn(fc(x, gf_dim * 2 * img_dim / 4 * img_dim / 4))
x = fluid.layers.reshape(x, [-1, gf_dim * 2, img_dim / 4, img_dim / 4])
x = bn(fc(x, gf_dim * 2 * img_dim // 4 * img_dim // 4))
x = fluid.layers.reshape(x, [-1, gf_dim * 2, img_dim // 4, img_dim // 4])
x = deconv(x, gf_dim * 2, act='relu', output_size=[14, 14])
x = deconv(x, 1, filter_size=5, padding=2, act='tanh', output_size=[28, 28])
x = fluid.layers.reshape(x, shape=[-1, 28 * 28])
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import distutils.util
import numpy as np
import inspect
import matplotlib
import six
matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
......@@ -54,7 +58,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
......
......@@ -21,21 +21,23 @@ TODO
horse2zebra训练集包含1069张野马图片,1336张斑马图片。测试集包含121张野马图片和141张斑马图片。
数据下载处理完毕后,并组织为以下路径:
数据下载处理完毕后,并组织为以下路径结构
```
horse2zebra/
|-- testA
|-- testA.txt
|-- testB
|-- testB.txt
|-- trainA
|-- trainA.txt
|-- trainB
`-- trainB.txt
data
|-- horse2zebra
| |-- testA
| |-- testA.txt
| |-- testB
| |-- testB.txt
| |-- trainA
| |-- trainA.txt
| |-- trainB
| `-- trainB.txt
```
以上数据文件中,‘testA’为存放野马测试图片的文件夹,‘testB’为存放斑马测试图片的文件夹,'testA.txt'和'testB.txt'分别为野马和斑马测试图片路径列表文件,格式如下:
以上数据文件中,`data`文件夹需要放在训练脚本`train.py`同级目录下。`testA`为存放野马测试图片的文件夹,`testB`为存放斑马测试图片的文件夹,`testA.txt``testB.txt`分别为野马和斑马测试图片路径列表文件,格式如下:
```
testA/n02381460_9243.jpg
......@@ -53,7 +55,7 @@ testA/n02381460_9245.jpg
在GPU单卡上训练:
```
env CUDA_VISIABLE_DEVICES=0 python train.py
env CUDA_VISIBLE_DEVICES=0 python train.py
```
执行`python train.py --help`可查看更多使用方式和参数详细说明。
......@@ -72,7 +74,7 @@ env CUDA_VISIABLE_DEVICES=0 python train.py
```
env CUDA_VISIBLE_DEVICE=0 python infer.py \
--model_path="models/1" --input="./data/inputA/*" \
--init_model="models/1" --input="./data/inputA/*" \
--output="./output"
```
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from PIL import Image
import numpy as np
from itertools import izip
A_LIST_FILE = "./data/horse2zebra/trainA.txt"
B_LIST_FILE = "./data/horse2zebra/trainB.txt"
......@@ -70,11 +72,3 @@ def b_test_reader():
Reader of images with B style for test.
"""
return reader_creater(B_TEST_LIST_FILE, cycle=False, return_name=True)
if __name__ == "__main__":
for A, B in izip(a_test_reader()(), a_test_reader()()):
print A[0].shape
print A[1]
print B[0].shape
print B[1]
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import data_reader
import os
import random
......@@ -9,7 +12,6 @@ import paddle.fluid as fluid
import numpy as np
from paddle.fluid import core
from trainer import *
from itertools import izip
from scipy.misc import imsave
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, ImagePool
......@@ -66,7 +68,7 @@ def train(args):
if not os.path.exists(out_path):
os.makedirs(out_path)
i = 0
for data_A, data_B in izip(A_test_reader(), B_test_reader()):
for data_A, data_B in zip(A_test_reader(), B_test_reader()):
A_name = data_A[1]
B_name = data_B[1]
tensor_A = core.LoDTensor()
......@@ -114,7 +116,7 @@ def train(args):
exe, out_path + "/d_a", main_program=d_A_trainer.program)
fluid.io.save_persistables(
exe, out_path + "/d_b", main_program=d_B_trainer.program)
print "saved checkpoint to [%s]" % out_path
print("saved checkpoint to {}".format(out_path))
sys.stdout.flush()
def init_model():
......@@ -128,7 +130,7 @@ def train(args):
exe, args.init_model + "/d_a", main_program=d_A_trainer.program)
fluid.io.load_persistables(
exe, args.init_model + "/d_b", main_program=d_B_trainer.program)
print "Load model from [%s]" % args.init_model
print("Load model from {}".format(args.init_model))
if args.init_model:
init_model()
......@@ -136,8 +138,8 @@ def train(args):
for epoch in range(args.epoch):
batch_id = 0
for i in range(max_images_num):
data_A = A_reader.next()
data_B = B_reader.next()
data_A = next(A_reader)
data_B = next(B_reader)
tensor_A = core.LoDTensor()
tensor_B = core.LoDTensor()
tensor_A.set(data_A, place)
......@@ -174,9 +176,9 @@ def train(args):
feed={"input_A": tensor_A,
"fake_pool_A": fake_pool_A})
print "epoch[%d]; batch[%d]; g_A_loss: %s; d_B_loss: %s; g_B_loss: %s; d_A_loss: %s;" % (
print("epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {};".format(
epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0],
d_A_loss[0])
d_A_loss[0]))
sys.stdout.flush()
batch_id += 1
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from model import *
import paddle.fluid as fluid
......
......@@ -17,6 +17,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import six
import random
import glob
import numpy as np
......@@ -39,7 +40,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
......
......@@ -8,7 +8,7 @@ import os
import cv2
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
from icnet import icnet
from utils import add_arguments, print_arguments, get_feeder_data
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
......@@ -111,10 +111,10 @@ def infer(args):
for line in open(args.images_list):
image_file = args.images_path + "/" + line.strip()
filename = os.path.basename(image_file)
image = paddle.image.load_image(
image = paddle.dataset.image.load_image(
image_file, is_color=True).astype("float32")
image -= IMG_MEAN
img = paddle.image.to_chw(image)[np.newaxis, :]
img = paddle.dataset.image.to_chw(image)[np.newaxis, :]
image_t = fluid.core.LoDTensor()
image_t.set(img, place)
result = exe.run(inference_program,
......
......@@ -14,7 +14,7 @@
## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的v0.13.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明来更新PaddlePaddle。
在当前目录下运行样例代码需要PadddlePaddle Fluid的v0.13.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据安装文档中的说明来更新PaddlePaddle。
## 数据准备
......
......@@ -8,7 +8,7 @@ import sys
import os
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
def test_model(exe, test_program, fetch_list, test_reader, feeder):
......
......@@ -52,7 +52,7 @@ In this example, we launched 4 parameter server instances and 4 trainer instance
1. launch trainer process
``` python
PADDLE_TRAINING_ROLE=PSERVER \
PADDLE_TRAINING_ROLE=TRAINER \
PADDLE_TRAINERS=4 \
PADDLE_PSERVER_IPS=192.168.0.100,192.168.0.101,192.168.0.102,192.168.0.103 \
PADDLE_TRAINER_ID=0 \
......@@ -110,4 +110,4 @@ Training acc1 curves
### Performance
TBD
\ No newline at end of file
TBD
......@@ -22,6 +22,7 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import six
import sys
sys.path.append("..")
import models
......@@ -172,7 +173,7 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
def test_parallel(exe, test_args, args, test_prog, feeder):
acc_evaluators = []
for i in xrange(len(test_args[2])):
for i in six.moves.xrange(len(test_args[2])):
acc_evaluators.append(fluid.metrics.Accuracy())
to_fetch = [v.name for v in test_args[2]]
......@@ -291,7 +292,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
......@@ -307,7 +308,7 @@ def print_paddle_envs():
print('----------- Configuration envs -----------')
for k in os.environ:
if "PADDLE_" in k:
print "ENV %s:%s" % (k, os.environ[k])
print("ENV %s:%s" % (k, os.environ[k]))
print('------------------------------------------------')
......
......@@ -140,7 +140,7 @@ def _reader_creator(file_list,
# distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
per_node_lines = len(full_lines) / trainer_count
per_node_lines = len(full_lines) // trainer_count
lines = full_lines[trainer_id * per_node_lines:(trainer_id + 1)
* per_node_lines]
print(
......
......@@ -33,7 +33,7 @@ add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
add_arg('data_dir' str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
......
......@@ -4,7 +4,6 @@ import random
import cPickle
import functools
import numpy as np
#import paddle.v2 as paddle
import paddle
from PIL import Image, ImageEnhance
......
#!/bin/bash
DATA_PATH=$HOME/.cache/paddle/dataset/wmt16
if [ ! -d $DATA_PATH/en_10000.dict ] ; then
if [ ! -e $DATA_PATH/en_10000.dict ] ; then
python -c 'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")().next()'
tar -zxf $DATA_PATH/wmt16.tar.gz -C $DATA_PATH
fi
......
......@@ -63,7 +63,7 @@ WMT 数据集是机器翻译领域公认的主流数据集;WMT 英德和英法
#### WMT 英德翻译数据
[WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。
[WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2016.tok.bpe.32000.en``newstest2016.tok.bpe.32000.de` 等为测试数据(`newstest2016.tok.en``newstest2016.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。
由于本示例中的数据读取脚本 `reader.py` 默认使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(默认句子中的词之间使用空格分隔),因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并:
```sh
......@@ -91,7 +91,7 @@ python -u train.py \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \
--use_token_batch True \
--batch_size 3200 \
--batch_size 4096 \
--sort_type pool \
--pool_size 200000
```
......@@ -100,7 +100,7 @@ python -u train.py \
python train.py --help
```
更多模型训练相关的参数则在 `config.py` 中的 `ModelHyperParams``TrainTaskConfig` 内定义;`ModelHyperParams` 定义了 embedding 维度等模型超参数,`TrainTaskConfig` 定义了 warmup 步数等训练需要的参数。这些参数默认使用了 Transformer 论文中 base model 的配置,如需调整可以在该脚本中进行修改。另外这些参数同样可在执行训练脚本的命令行中设置,传入的配置会合并并覆盖 `config.py` 中的配置,如可以通过以下命令来训练 Transformer 论文中的 big model :
更多模型训练相关的参数则在 `config.py` 中的 `ModelHyperParams``TrainTaskConfig` 内定义;`ModelHyperParams` 定义了 embedding 维度等模型超参数,`TrainTaskConfig` 定义了 warmup 步数等训练需要的参数。这些参数默认使用了 Transformer 论文中 base model 的配置,如需调整可以在该脚本中进行修改。另外这些参数同样可在执行训练脚本的命令行中设置,传入的配置会合并并覆盖 `config.py` 中的配置,如可以通过以下命令来训练 Transformer 论文中的 big model (如显存不够可适当减小 batch size 的值)
```sh
python -u train.py \
......@@ -117,22 +117,23 @@ python -u train.py \
n_head 16 \
d_model 1024 \
d_inner_hid 4096 \
dropout 0.3
n_head 16 \
prepostprocess_dropout 0.3
```
有关这些参数更详细信息的请参考 `config.py` 中的注释说明。对于英法翻译数据,执行训练和英德翻译训练类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外要注意的是由于英法翻译数据 token 间不是使用空格进行分隔,需要修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`
训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 epoch 内也会每隔1000个 iteration 进行一次保存,每个 iteration 将打印如下的日志到标准输出:
训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每隔一定 iteration 后(通过参数 `save_freq` 设置,默认为10000)保存模型到参数 `model_dir` 指定的目录,每个 epoch 结束后也会保存 checkpiont 到 `ckpt_dir` 指定的目录,每个 iteration 将打印如下的日志到标准输出:
```txt
epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531
epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438
epoch: 0, batch: 2, sum loss: 258931.093750, avg loss: 11.064013, ppl: 63832.167969
epoch: 0, batch: 3, sum loss: 256837.875000, avg loss: 11.058206, ppl: 63462.574219
epoch: 0, batch: 4, sum loss: 256461.000000, avg loss: 11.053401, ppl: 63158.390625
epoch: 0, batch: 5, sum loss: 257064.562500, avg loss: 11.019099, ppl: 61028.683594
epoch: 0, batch: 6, sum loss: 256180.125000, avg loss: 11.008556, ppl: 60388.644531
epoch: 0, batch: 7, sum loss: 256619.671875, avg loss: 11.007106, ppl: 60301.113281
epoch: 0, batch: 8, sum loss: 255716.734375, avg loss: 10.966025, ppl: 57874.105469
epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187500
step_idx: 0, epoch: 0, batch: 0, avg loss: 11.059394, normalized loss: 9.682427, ppl: 63538.027344
step_idx: 1, epoch: 0, batch: 1, avg loss: 11.053112, normalized loss: 9.676146, ppl: 63140.144531
step_idx: 2, epoch: 0, batch: 2, avg loss: 11.054576, normalized loss: 9.677609, ppl: 63232.640625
step_idx: 3, epoch: 0, batch: 3, avg loss: 11.046638, normalized loss: 9.669671, ppl: 62732.664062
step_idx: 4, epoch: 0, batch: 4, avg loss: 11.030095, normalized loss: 9.653129, ppl: 61703.449219
step_idx: 5, epoch: 0, batch: 5, avg loss: 11.047491, normalized loss: 9.670525, ppl: 62786.230469
step_idx: 6, epoch: 0, batch: 6, avg loss: 11.044509, normalized loss: 9.667542, ppl: 62599.273438
step_idx: 7, epoch: 0, batch: 7, avg loss: 11.011090, normalized loss: 9.634124, ppl: 60541.859375
step_idx: 8, epoch: 0, batch: 8, avg loss: 10.985243, normalized loss: 9.608276, ppl: 58997.058594
step_idx: 9, epoch: 0, batch: 9, avg loss: 10.993434, normalized loss: 9.616467, ppl: 59482.292969
```
### 模型预测
......@@ -143,19 +144,19 @@ python -u infer.py \
--src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \
--test_file_pattern data/newstest2013.tok.bpe.32000.en-de \
--test_file_pattern data/newstest2016.tok.bpe.32000.en-de \
--use_wordpiece False \
--token_delimiter ' ' \
--batch_size 4 \
model_path trained_models/pass_20.infer.model \
beam_size 5 \
max_out_len 256
--batch_size 32 \
model_path trained_models/iter_199999.infer.model \
beam_size 4 \
max_out_len 255
```
和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。
执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。对于使用 BPE 的英德数据,预测出的翻译结果也将是 BPE 表示的数据,要还原成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中(无需再次 tokenize 处理):
```sh
sed 's/@@ //g' predict.txt > predict.tok.txt
sed -r 's/(@@ )|(@@ ?$)//g' predict.txt > predict.tok.txt
```
对于英法翻译的 wordpiece 数据,执行预测和英德翻译预测类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外需要注意修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`;同时要修改 `use_wordpiece` 参数的设置为 `--use_wordpiece True`,这会在预测时将翻译得到的 wordpiece 数据还原为原始数据输出。为了使用 tokenize 的数据进行评估,还需要对翻译结果进行 tokenize 的处理,[Moses](https://github.com/moses-smt/mosesdecoder) 提供了一系列机器翻译相关的脚本。执行 `git clone https://github.com/moses-smt/mosesdecoder.git` 克隆 mosesdecoder 仓库后,可以使用其中的 `tokenizer.perl` 脚本对 `predict.txt` 内的翻译结果进行 tokenize 处理并输出到 `predict.tok.txt` 中,如下:
......@@ -163,15 +164,21 @@ sed 's/@@ //g' predict.txt > predict.tok.txt
perl mosesdecoder/scripts/tokenizer/tokenizer.perl -l fr < predict.txt > predict.tok.txt
```
接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2013.tok.de` 数据为例,执行如下命令:
接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2016.tok.de` 数据为例,执行如下命令:
```sh
perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt
perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2016.tok.de < predict.tok.txt
```
可以看到类似如下的结果。
可以看到类似如下的结果(为单机两卡训练 200K 个 iteration 后模型的预测结果)
```
BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412)
BLEU = 33.08, 64.2/39.2/26.4/18.5 (BP=0.994, ratio=0.994, hyp_len=61971, ref_len=62362)
```
目前在未使用 model average 的情况下,使用默认配置单机八卡(同论文中 base model 的配置)进行训练,英德翻译在 `newstest2013` 上测试 BLEU 值为25.,在 `newstest2014` 上测试 BLEU 值为26.;英法翻译在 `newstest2014` 上测试 BLEU 值为36.。
目前在未使用 model average 的情况下,英德翻译 base model 八卡训练 100K 个 iteration 后测试 BLEU 值如下:
| 测试集 | newstest2013 | newstest2014 | newstest2015 | newstest2016 |
|-|-|-|-|-|
| BLEU | 25.27 | 26.05 | 28.75 | 33.27 |
英法翻译 base model 八卡训练 100K 个 iteration 后在 `newstest2014` 上测试 BLEU 值为36.。
### 分布式训练
......
......@@ -9,12 +9,12 @@ class TrainTaskConfig(object):
# the hyper parameters for Adam optimizer.
# This static learning_rate will be multiplied to the LearningRateScheduler
# derived learning rate the to get the final learning rate.
learning_rate = 1
learning_rate = 2.0
beta1 = 0.9
beta2 = 0.98
beta2 = 0.997
eps = 1e-9
# the parameters for learning rate scheduling.
warmup_steps = 4000
warmup_steps = 8000
# the weight used to mix up the ground-truth distribution and the fixed
# uniform distribution in label smoothing when training.
# Set this as zero if label smoothing is not wanted.
......@@ -30,6 +30,8 @@ class TrainTaskConfig(object):
# It should be provided if use checkpoints, since the checkpoint doesn't
# include the training step counter currently.
start_step = 0
# the frequency to save trained models.
save_freq = 10000
class InferTaskConfig(object):
......@@ -63,7 +65,6 @@ class ModelHyperParams(object):
# index for <unk> token
unk_idx = 2
# max length of sequences deciding the size of position encoding table.
# Start from 1 and count start and end tokens in.
max_length = 256
# the dimension for word embeddings, which is also the last dimension of
# the input and output of multi-head attention, position-wise feed-forward
......@@ -79,8 +80,14 @@ class ModelHyperParams(object):
n_head = 8
# number of sub-layers to be stacked in the encoder and decoder.
n_layer = 6
# dropout rate used by all dropout layers.
dropout = 0.1
# dropout rates of different modules.
prepostprocess_dropout = 0.1
attention_dropout = 0.1
relu_dropout = 0.1
# to process before each sub-layer
preprocess_cmd = "n" # layer normalization
# to process after each sub-layer
postprocess_cmd = "da" # dropout + residual connection
# random seed used in dropout for CE.
dropout_seed = None
# the flag indicating whether to share embedding and softmax weights.
......
......@@ -156,7 +156,9 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout,
ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
ModelHyperParams.weight_sharing, InferTaskConfig.beam_size,
InferTaskConfig.max_out_len, ModelHyperParams.eos_idx)
......@@ -169,7 +171,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
])
# This is used here to set dropout to the test mode.
infer_program = fluid.default_main_program().inference_optimize()
infer_program = fluid.default_main_program().clone(for_test=True)
for batch_id, data in enumerate(test_data.batch_generator()):
data_input = prepare_batch_input(
......
import os
import time
import argparse
import ast
import numpy as np
import multiprocessing
import os
import six
import time
import paddle
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from train import split_data, read_multiple, prepare_batch_input
from model import transformer, position_encoding_init
from optim import LearningRateScheduler
from config import *
import reader
from config import *
from train import pad_batch_data, prepare_data_generator, \
prepare_feed_dict_list, py_reader_provider_wrapper
from model import transformer, position_encoding_init
def parse_args():
parser = argparse.ArgumentParser(
"Profile the training process for Transformer.")
parser = argparse.ArgumentParser("Training for Transformer.")
parser.add_argument(
"--src_vocab_fpath",
type=str,
......@@ -43,38 +42,70 @@ def parse_args():
parser.add_argument(
"--batch_size",
type=int,
default=2048,
default=4096,
help="The number of sequences contained in a mini-batch, or the maximum "
"number of tokens (include paddings) contained in a mini-batch. Note "
"that this represents the number on single device and the actual batch "
"size for multi-devices will multiply the device number.")
parser.add_argument(
"--num_iters",
type=int,
default=10,
help="The maximum number of iterations profiling over.")
parser.add_argument(
"--pool_size",
type=int,
default=10000,
default=200000,
help="The buffer size to pool data.")
parser.add_argument(
"--sort_type",
default="pool",
choices=("global", "pool", "none"),
help="The grain to sort by length: global for all instances; pool for "
"instances in pool; none for no sort.")
parser.add_argument(
"--shuffle",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to shuffle instances in each pass.")
parser.add_argument(
"--shuffle_batch",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to shuffle the data batches.")
parser.add_argument(
"--special_token",
type=str,
default=["<s>", "<e>", "<unk>"],
nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--token_delimiter",
type=lambda x: str(x.encode().decode("unicode-escape")),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument(
"--use_mem_opt",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to use memory optimization.")
parser.add_argument(
"--use_py_reader",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to use py_reader.")
parser.add_argument(
"--iter_num",
type=int,
default=20,
help="The iteration number to run in profiling.")
parser.add_argument(
"--use_parallel_exe",
type=bool,
default=False,
help="The flag indicating whether to use ParallelExecutor.")
parser.add_argument(
'opts',
help='See config.py for all options',
default=None,
nargs=argparse.REMAINDER)
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
args = parser.parse_args()
# Append args related to dict
......@@ -91,153 +122,147 @@ def parse_args():
return args
def train_loop(exe, train_progm, init, num_iters, train_data, dev_count,
sum_cost, avg_cost, lr_scheduler, token_num, predict):
data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
-1] + label_data_input_fields
start_time = time.time()
exec_time = 0.0
for batch_id, data in enumerate(train_data()):
if batch_id >= num_iters:
break
feed_list = []
total_num_token = 0
for place_id, data_buffer in enumerate(
split_data(
data, num_part=dev_count)):
data_input_dict, num_token = prepare_batch_input(
data_buffer, data_input_names, ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx, ModelHyperParams.n_head,
ModelHyperParams.d_model)
total_num_token += num_token
feed_kv_pairs = data_input_dict.items()
lr_rate = lr_scheduler.update_learning_rate()
feed_kv_pairs += {lr_scheduler.learning_rate.name: lr_rate}.items()
feed_list.append(dict(feed_kv_pairs))
if not init:
for pos_enc_param_name in pos_enc_param_names:
pos_enc = position_encoding_init(
ModelHyperParams.max_length + 1,
ModelHyperParams.d_model)
feed_list[place_id][pos_enc_param_name] = pos_enc
for feed_dict in feed_list:
feed_dict[sum_cost.name + "@GRAD"] = 1. / total_num_token
exe_start_time = time.time()
if dev_count > 1:
# prallel executor
outs = exe.run(fetch_list=[sum_cost.name, token_num.name],
feed=feed_list)
else:
# executor
outs = exe.run(fetch_list=[sum_cost, token_num], feed=feed_list[0])
exec_time += time.time() - exe_start_time
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1])
total_sum_cost = sum_cost_val.sum() # sum the cost from multi-devices
total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num
print("batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(batch_id, total_sum_cost, total_avg_cost,
np.exp([min(total_avg_cost, 100)])))
init = True
return time.time() - start_time, exec_time
def profile(args):
print args
if args.device == 'CPU':
TrainTaskConfig.use_gpu = False
if not TrainTaskConfig.use_gpu:
place = fluid.CPUPlace()
dev_count = multiprocessing.cpu_count()
else:
def main(args):
train_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(train_prog, startup_prog):
with fluid.unique_name.guard():
sum_cost, avg_cost, predict, token_num, pyreader = transformer(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer,
ModelHyperParams.n_head,
ModelHyperParams.d_key,
ModelHyperParams.d_value,
ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid,
ModelHyperParams.prepostprocess_dropout,
ModelHyperParams.attention_dropout,
ModelHyperParams.relu_dropout,
ModelHyperParams.preprocess_cmd,
ModelHyperParams.postprocess_cmd,
ModelHyperParams.weight_sharing,
TrainTaskConfig.label_smooth_eps,
use_py_reader=args.use_py_reader,
is_test=False)
lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
ModelHyperParams.d_model, TrainTaskConfig.warmup_steps)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_decay * TrainTaskConfig.learning_rate,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(avg_cost)
if args.use_mem_opt:
fluid.memory_optimize(train_prog)
if TrainTaskConfig.use_gpu:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exe = fluid.Executor(place)
sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_scheduler.learning_rate,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost)
# Initialize the parameters.
if TrainTaskConfig.ckpt_path:
fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path)
lr_scheduler.current_steps = TrainTaskConfig.start_step
else:
exe.run(fluid.framework.default_startup_program())
# Disable all sorts for they will be done in the 1st batch.
train_data = reader.DataReader(
src_vocab_fpath=args.src_vocab_fpath,
trg_vocab_fpath=args.trg_vocab_fpath,
fpattern=args.train_file_pattern,
use_token_batch=args.use_token_batch,
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count),
pool_size=args.pool_size,
sort_type='none',
shuffle=False,
shuffle_batch=False,
start_mark=args.special_token[0],
end_mark=args.special_token[1],
unk_mark=args.special_token[2],
# count start and end tokens out
max_length=ModelHyperParams.max_length - 2,
clip_last_batch=False)
train_data = read_multiple(
reader=train_data.batch_generator,
count=dev_count if args.use_token_batch else 1)
if dev_count > 1:
build_strategy = fluid.BuildStrategy()
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
loss_name=sum_cost.name,
main_program=fluid.default_main_program(),
build_strategy=build_strategy)
print("Warming up ...")
train_loop(exe if dev_count == 1 else train_exe,
fluid.default_main_program(), False, 3, train_data, dev_count,
sum_cost, avg_cost, lr_scheduler, token_num, predict)
print("\nProfiling ...")
if dev_count == 1:
with profiler.profiler('All', 'total', '/tmp/profile_file'):
total_time, exec_time = train_loop(
exe,
fluid.default_main_program(), True, args.num_iters, train_data,
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict)
exe.run(startup_prog)
exec_strategy = fluid.ExecutionStrategy()
# For faster executor
exec_strategy.use_experimental_executor = True
exec_strategy.num_iteration_per_drop_scope = 5
build_strategy = fluid.BuildStrategy()
# Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is
# `1 / token_number` for average cost.
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
loss_name=avg_cost.name,
main_program=train_prog,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
# the best cross-entropy value with label smoothing
loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log(
(1. - TrainTaskConfig.label_smooth_eps
)) + TrainTaskConfig.label_smooth_eps *
np.log(TrainTaskConfig.label_smooth_eps / (
ModelHyperParams.trg_vocab_size - 1) + 1e-20))
train_data = prepare_data_generator(
args, is_test=False, count=dev_count, pyreader=pyreader)
if args.use_py_reader:
pyreader.start()
data_generator = None
else:
total_time, exec_time = train_loop(
train_exe,
fluid.default_main_program(), True, args.num_iters, train_data,
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict)
print("Elapsed time: total %f s, in executor %f s" %
(total_time, exec_time))
data_generator = train_data()
def run(iter_num):
reader_time = []
run_time = []
for step_idx in six.moves.xrange(iter_num):
try:
start_time = time.time()
feed_dict_list = prepare_feed_dict_list(data_generator,
init_flag, dev_count)
end_time = time.time()
reader_time.append(end_time - start_time)
start_time = time.time()
if args.use_parallel_exe:
outs = train_exe.run(
fetch_list=[sum_cost.name, token_num.name],
feed=feed_dict_list)
else:
outs = exe.run(program=train_prog,
fetch_list=[sum_cost.name, token_num.name],
feed=feed_dict_list[0]
if feed_dict_list is not None else None)
end_time = time.time()
run_time.append(end_time - start_time)
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[
1])
# sum the cost from multi-devices
total_sum_cost = sum_cost_val.sum()
total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num
print("step_idx: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f" %
(step_idx, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)])))
except (StopIteration, fluid.core.EOFException):
# The current pass is over.
if args.use_py_reader:
pyreader.reset()
pyreader.start()
break
return reader_time, run_time
# start-up
init_flag = True
run(1)
init_flag = False
# profiling
start = time.time()
# currently only support profiling on one device
with profiler.profiler('All', 'total', '/tmp/profile_file'):
reader_time, run_time = run(args.iter_num)
end = time.time()
total_time = end - start
print("Total time: {0}, reader time: {1} s, run time: {2} s".format(
total_time, np.sum(reader_time), np.sum(run_time)))
if __name__ == "__main__":
args = parse_args()
profile(args)
main(args)
import glob
import six
import os
import tarfile
......@@ -12,15 +13,16 @@ class SortType(object):
class Converter(object):
def __init__(self, vocab, beg, end, unk, delimiter):
def __init__(self, vocab, beg, end, unk, delimiter, add_beg):
self._vocab = vocab
self._beg = beg
self._end = end
self._unk = unk
self._delimiter = delimiter
self._add_beg = add_beg
def __call__(self, sentence):
return [self._beg] + [
return ([self._beg] if self._add_beg else []) + [
self._vocab.get(w, self._unk)
for w in sentence.split(self._delimiter)
] + [self._end]
......@@ -215,7 +217,8 @@ class DataReader(object):
beg=self._src_vocab[start_mark],
end=self._src_vocab[end_mark],
unk=self._src_vocab[unk_mark],
delimiter=self._token_delimiter)
delimiter=self._token_delimiter,
add_beg=False)
]
if not self._only_src:
converters.append(
......@@ -224,7 +227,8 @@ class DataReader(object):
beg=self._trg_vocab[start_mark],
end=self._trg_vocab[end_mark],
unk=self._trg_vocab[unk_mark],
delimiter=self._token_delimiter))
delimiter=self._token_delimiter,
add_beg=True))
converters = ComposedConverter(converters)
......@@ -259,8 +263,10 @@ class DataReader(object):
if not os.path.isfile(fpath):
raise IOError("Invalid file: %s" % fpath)
with open(fpath, "r") as f:
with open(fpath, "rb") as f:
for line in f:
if six.PY3:
line = line.decode()
fields = line.strip("\n").split(self._field_delimiter)
if (not self._only_src and len(fields) == 2) or (
self._only_src and len(fields) == 1):
......@@ -269,8 +275,10 @@ class DataReader(object):
@staticmethod
def load_dict(dict_path, reverse=False):
word_dict = {}
with open(dict_path, "r") as fdict:
with open(dict_path, "rb") as fdict:
for idx, line in enumerate(fdict):
if six.PY3:
line = line.decode()
if reverse:
word_dict[idx] = line.strip("\n")
else:
......@@ -280,8 +288,7 @@ class DataReader(object):
def batch_generator(self):
# global sort or global shuffle
if self._sort_type == SortType.GLOBAL:
infos = sorted(
self._sample_infos, key=lambda x: x.max_len, reverse=True)
infos = sorted(self._sample_infos, key=lambda x: x.max_len)
else:
if self._shuffle:
infos = self._sample_infos
......
......@@ -20,3 +20,4 @@ data/pascalvoc/trainval.txt
log*
*.log
ssd_mobilenet_v1_pascalvoc*
......@@ -38,7 +38,8 @@ train_parameters = {
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01]
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01],
"ap_version": '11point',
},
"coco2014": {
"train_images": 82783,
......@@ -47,7 +48,8 @@ train_parameters = {
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25]
"lr_decay": [1, 0.5, 0.25],
"ap_version": 'integral', # should use eval_coco_map.py to test model
},
"coco2017": {
"train_images": 118287,
......@@ -56,7 +58,8 @@ train_parameters = {
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25]
"lr_decay": [1, 0.5, 0.25],
"ap_version": 'integral', # should use eval_coco_map.py to test model
}
}
......@@ -77,6 +80,7 @@ def optimizer_setting(train_params):
def build_program(main_prog, startup_prog, train_params, is_train):
image_shape = train_params['image_shape']
class_num = train_params['class_num']
ap_version = train_params['ap_version']
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=64,
......@@ -97,16 +101,15 @@ def build_program(main_prog, startup_prog, train_params, is_train):
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
with fluid.program_guard(main_prog):
loss = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
class_num,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
loss = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
class_num,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=ap_version)
return py_reader, loss
......@@ -126,7 +129,7 @@ def train(args,
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
batch_size = train_params['batch_size']
epoc_num = train_params['epoch_num']
epoc_num = train_params['epoc_num']
batch_size_per_device = batch_size // devices_num
iters_per_epoc = train_params["train_images"] // batch_size
num_workers = 8
......@@ -230,7 +233,7 @@ def train(args,
loss_v = np.mean(np.array(loss_v))
every_epoc_loss.append(loss_v)
if batch_id % 20 == 0:
print("Epoc {0}, batch {1}, loss {2}, time {3}".format(
print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
epoc_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time()
total_time += end_time - start_time
......
......@@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
import six
decoder_size = 128
word_vector_dim = 128
......@@ -22,7 +23,7 @@ def conv_bn_pool(input,
pool=True,
use_cudnn=True):
tmp = input
for i in xrange(group):
for i in six.moves.xrange(group):
filter_size = 3
conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5
conv_param = fluid.ParamAttr(
......
import paddle.v2 as paddle
import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
from attention_model import attention_eval
......
from __future__ import print_function
import paddle.v2 as paddle
import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer
import paddle.fluid.profiler as profiler
......
import numpy as np
import paddle.v2 as paddle
import paddle.fluid as fluid
# reproducible
np.random.seed(1)
......
......@@ -111,7 +111,6 @@ According to the congfiguration of evaluation, the output log is like:
Inference is used to get prediction score or video features based on trained models.
```
python infer.py \
--batch_size=128 \
--class_dim=101 \
--image_shape=3,224,224 \
--with_mem_opt=True \
......
......@@ -9,27 +9,33 @@ for line in f.readlines():
dd[name.lower()] = int(label) - 1
f.close()
# generate pkl
path = 'train/'
savepath = 'train_pkl/'
if not os.path.exists(savepath):
os.makedirs(savepath)
fw = open('train.list', 'w')
for folder in os.listdir(path):
vidid = folder.split('_', 1)[1]
this_label = dd[folder.split('_')[1].lower()]
this_feat = []
for img in sorted(os.listdir(path + folder)):
fout = open(path + folder + '/' + img, 'rb')
this_feat.append(fout.read())
fout.close()
res = [vidid, this_label, this_feat]
outp = open(savepath + vidid + '.pkl', 'wb')
cPickle.dump(res, outp, protocol=cPickle.HIGHEST_PROTOCOL)
outp.close()
fw.write('data/train_pkl/%s.pkl\n' % vidid)
fw.close()
def generate_pkl(mode):
# generate pkl
path = '%s/' % mode
savepath = '%s_pkl/' % mode
if not os.path.exists(savepath):
os.makedirs(savepath)
fw = open('%s.list' % mode, 'w')
for folder in os.listdir(path):
vidid = folder.split('_', 1)[1]
this_label = dd[folder.split('_')[1].lower()]
this_feat = []
for img in sorted(os.listdir(path + folder)):
fout = open(path + folder + '/' + img, 'rb')
this_feat.append(fout.read())
fout.close()
res = [vidid, this_label, this_feat]
outp = open(savepath + vidid + '.pkl', 'wb')
cPickle.dump(res, outp, protocol=cPickle.HIGHEST_PROTOCOL)
outp.close()
fw.write('data/%s/%s.pkl\n' % (savepath, vidid))
fw.close()
generate_pkl('train')
generate_pkl('test')
......@@ -2,7 +2,7 @@ import os
import numpy as np
import time
import sys
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
from resnet import TSN_ResNet
import reader
......
......@@ -2,7 +2,7 @@ import os
import numpy as np
import time
import sys
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
from resnet import TSN_ResNet
import reader
......
......@@ -5,7 +5,7 @@ import functools
import cPickle
from cStringIO import StringIO
import numpy as np
import paddle.v2 as paddle
import paddle
from PIL import Image, ImageEnhance
random.seed(0)
......@@ -16,8 +16,8 @@ THREAD = 8
BUF_SIZE = 1024
TRAIN_LIST = 'data/train.list'
TEST_LIST = 'data/val.list'
INFER_LIST = 'data/val.list'
TEST_LIST = 'data/test.list'
INFER_LIST = 'data/test.list'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
......
......@@ -2,6 +2,7 @@ import os
import numpy as np
import time
import sys
import paddle
import paddle.fluid as fluid
from resnet import TSN_ResNet
import reader
......
......@@ -12,23 +12,23 @@ The word embedding expresses words with a real vector. Each dimension of the vec
In the example of word vectors, we show how to use Hierarchical-Sigmoid and Noise Contrastive Estimation (NCE) to accelerate word-vector learning.
- 1.1 [Hsigmoid Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/v2/hsigmoid)
- 1.2 [Noise Contrastive Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/v2/nce_cost)
- 1.1 [Hsigmoid Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/legacy/hsigmoid)
- 1.2 [Noise Contrastive Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/legacy/nce_cost)
## 2. RNN language model
The language model is important in the field of natural language processing. In addition to getting the word vector (a by-product of language model training), it can also help us to generate text. Given a number of words, the language model can help us predict the next most likely word. In the example of using the language model to generate text, we focus on the recurrent neural network language model. We can use the instructions in the document quickly adapt to their training corpus, complete automatic writing poetry, automatic writing prose and other interesting models.
- 2.1 [Generate text using the RNN language model](https://github.com/PaddlePaddle/models/tree/develop/v2/generate_sequence_by_rnn_lm)
- 2.1 [Generate text using the RNN language model](https://github.com/PaddlePaddle/models/tree/develop/legacy/generate_sequence_by_rnn_lm)
## 3. Click-Through Rate prediction
The click-through rate model predicts the probability that a user will click on an ad. This is widely used for advertising technology. Logistic Regression has a good learning performance for large-scale sparse features in the early stages of the development of click-through rate prediction. In recent years, DNN model because of its strong learning ability to gradually take the banner rate of the task of the banner.
In the example of click-through rate estimates, we first give the Google's Wide & Deep model. This model combines the advantages of DNN and the applicable logistic regression model for DNN and large-scale sparse features. Then we provide the deep factorization machine for click-through rate prediction. The deep factorization machine combines the factorization machine and deep neural networks to model both low order and high order interactions of input features.
- 3.1 [Click-Through Rate Model](https://github.com/PaddlePaddle/models/tree/develop/v2/ctr)
- 3.2 [Deep Factorization Machine for Click-Through Rate prediction](https://github.com/PaddlePaddle/models/tree/develop/v2/deep_fm)
- 3.1 [Click-Through Rate Model](https://github.com/PaddlePaddle/models/tree/develop/legacy/ctr)
- 3.2 [Deep Factorization Machine for Click-Through Rate prediction](https://github.com/PaddlePaddle/models/tree/develop/legacy/deep_fm)
## 4. Text classification
......@@ -36,7 +36,7 @@ Text classification is one of the most basic tasks in natural language processin
For text classification, we provide a non-sequential text classification model based on DNN and CNN. (For LSTM-based model, please refer to PaddleBook [Sentiment Analysis](http://www.paddlepaddle.org/docs/develop/book/06.understand_sentiment/index.html)).
- 4.1 [Sentiment analysis based on DNN / CNN](https://github.com/PaddlePaddle/models/tree/develop/v2/text_classification)
- 4.1 [Sentiment analysis based on DNN / CNN](https://github.com/PaddlePaddle/models/tree/develop/legacy/text_classification)
## 5. Learning to rank
......@@ -45,14 +45,14 @@ The depth neural network can be used to model the fractional function to form va
The algorithms for learning to rank are usually categorized into three groups by their input representation and the loss function. These are pointwise, pairwise and listwise approaches. Here we demonstrate RankLoss loss function method (pairwise approach), and LambdaRank loss function method (listwise approach). (For Pointwise approaches, please refer to [Recommended System](http://www.paddlepaddle.org/docs/develop/book/05.recommender_system/index.html)).
- 5.1 [Learning to rank based on Pairwise and Listwise approches](https://github.com/PaddlePaddle/models/tree/develop/v2/ltr)
- 5.1 [Learning to rank based on Pairwise and Listwise approches](https://github.com/PaddlePaddle/models/tree/develop/legacy/ltr)
## 6. Semantic model
The deep structured semantic model uses the DNN model to learn the vector representation of the low latitude in a continuous semantic space, finally models the semantic similarity between the two sentences.
In this example, we demonstrate how to use PaddlePaddle to implement a generic deep structured semantic model to model the semantic similarity between two strings. The model supports different network structures such as CNN (Convolutional Network), FC (Fully Connected Network), RNN (Recurrent Neural Network), and different loss functions such as classification, regression, and sequencing.
- 6.1 [Deep structured semantic model](https://github.com/PaddlePaddle/models/tree/develop/v2/dssm)
- 6.1 [Deep structured semantic model](https://github.com/PaddlePaddle/models/tree/develop/legacy/dssm)
## 7. Sequence tagging
......@@ -60,7 +60,7 @@ Given the input sequence, the sequence tagging model is one of the most basic ta
In the example of the sequence tagging, we describe how to train an end-to-end sequence tagging model with the Named Entity Recognition (NER) task as an example.
- 7.1 [Name Entity Recognition](https://github.com/PaddlePaddle/models/tree/develop/v2/sequence_tagging_for_ner)
- 7.1 [Name Entity Recognition](https://github.com/PaddlePaddle/models/tree/develop/legacy/sequence_tagging_for_ner)
## 8. Sequence to sequence learning
......@@ -68,19 +68,19 @@ Sequence-to-sequence model has a wide range of applications. This includes machi
As an example for sequence-to-sequence learning, we take the machine translation task. We demonstrate the sequence-to-sequence mapping model without attention mechanism, which is the basis for all sequence-to-sequence learning models. We will use scheduled sampling to improve the problem of error accumulation in the RNN model, and machine translation with external memory mechanism.
- 8.1 [Basic Sequence-to-sequence model](https://github.com/PaddlePaddle/models/tree/develop/v2/nmt_without_attention)
- 8.1 [Basic Sequence-to-sequence model](https://github.com/PaddlePaddle/models/tree/develop/legacy/nmt_without_attention)
## 9. Image classification
For the example of image classification, we show you how to train AlexNet, VGG, GoogLeNet, ResNet, Inception-v4, Inception-Resnet-V2 and Xception models in PaddlePaddle. It also provides model conversion tools that convert Caffe or TensorFlow trained model files into PaddlePaddle model files.
- 9.1 [convert Caffe model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification/caffe2paddle)
- 9.2 [convert TensorFlow model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification/tf2paddle)
- 9.3 [AlexNet](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.4 [VGG](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.5 [Residual Network](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.6 [Inception-v4](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.7 [Inception-Resnet-V2](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.8 [Xception](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification)
- 9.1 [convert Caffe model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification/caffe2paddle)
- 9.2 [convert TensorFlow model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification/tf2paddle)
- 9.3 [AlexNet](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.4 [VGG](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.5 [Residual Network](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.6 [Inception-v4](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.7 [Inception-Resnet-V2](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.8 [Xception](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
This tutorial is contributed by [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and licensed under the [Apache-2.0 license](LICENSE).
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册