提交 be80e25e 编写于 作者: Y Yibing Liu

Merge branch 'develop' of https://github.com/PaddlePaddle/models into dam_py3

...@@ -8,7 +8,7 @@ PaddlePaddle provides a rich set of computational units to enable users to adopt ...@@ -8,7 +8,7 @@ PaddlePaddle provides a rich set of computational units to enable users to adopt
- [fluid models](fluid): use PaddlePaddle's Fluid APIs. We especially recommend users to use Fluid models. - [fluid models](fluid): use PaddlePaddle's Fluid APIs. We especially recommend users to use Fluid models.
- [v2 models](v2): use PaddlePaddle's v2 APIs. - [legacy models](legacy): use PaddlePaddle's v2 APIs.
## License ## License
......
...@@ -2,7 +2,6 @@ from __future__ import absolute_import ...@@ -2,7 +2,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -158,7 +158,8 @@ class DuelingDQNModel(object): ...@@ -158,7 +158,8 @@ class DuelingDQNModel(object):
for i, var in enumerate(policy_vars): for i, var in enumerate(policy_vars):
sync_op = fluid.layers.assign(policy_vars[i], target_vars[i]) sync_op = fluid.layers.assign(policy_vars[i], target_vars[i])
sync_ops.append(sync_op) sync_ops.append(sync_op)
sync_program = sync_program.prune(sync_ops) # The prune API is deprecated, please don't use it any more.
sync_program = sync_program._prune(sync_ops)
return sync_program return sync_program
def act(self, state, train_or_test): def act(self, state, train_or_test):
......
...@@ -9,7 +9,7 @@ import gym ...@@ -9,7 +9,7 @@ import gym
from gym import spaces from gym import spaces
from gym.envs.atari.atari_env import ACTION_MEANING from gym.envs.atari.atari_env import ACTION_MEANING
from ale_python_interface import ALEInterface from atari_py import ALEInterface
__all__ = ['AtariPlayer'] __all__ = ['AtariPlayer']
......
""" """
CNN on mnist data using fluid api of paddlepaddle CNN on mnist data using fluid api of paddlepaddle
""" """
import paddle.v2 as paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -8,7 +8,7 @@ sys.path.append("..") ...@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.gradient_method import BIM from advbox.attacks.gradient_method import BIM
......
...@@ -8,7 +8,7 @@ sys.path.append("..") ...@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.deepfool import DeepFoolAttack from advbox.attacks.deepfool import DeepFoolAttack
......
...@@ -8,7 +8,7 @@ sys.path.append("..") ...@@ -8,7 +8,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.gradient_method import FGSM from advbox.attacks.gradient_method import FGSM
......
...@@ -7,7 +7,7 @@ sys.path.append("..") ...@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.gradient_method import ILCM from advbox.attacks.gradient_method import ILCM
......
...@@ -7,7 +7,7 @@ sys.path.append("..") ...@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.saliency import JSMA from advbox.attacks.saliency import JSMA
......
...@@ -7,7 +7,7 @@ sys.path.append("..") ...@@ -7,7 +7,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.lbfgs import LBFGS from advbox.attacks.lbfgs import LBFGS
......
...@@ -9,7 +9,7 @@ sys.path.append("..") ...@@ -9,7 +9,7 @@ sys.path.append("..")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from advbox.adversary import Adversary from advbox.adversary import Adversary
from advbox.attacks.gradient_method import MIFGSM from advbox.attacks.gradient_method import MIFGSM
......
...@@ -55,6 +55,12 @@ for more detailed explanation about the arguments, please run ...@@ -55,6 +55,12 @@ for more detailed explanation about the arguments, please run
python ../train_and_evaluate.py --help python ../train_and_evaluate.py --help
``` ```
By default, the training is executed on one single GPU, which can be switched to multiple-GPU mode easily by simply resetting the visible devices in `train.sh`, e.g.,
```
export CUDA_VISIBLE_DEVICES=0,1,2,3
```
4) Run test by 4) Run test by
``` ```
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0
python -u ../test_and_evaluate.py --use_cuda \ python -u ../test_and_evaluate.py --use_cuda \
--ext_eval \ --ext_eval \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--save_path ./eval_10000 \ --save_path ./eval_3900 \
--model_path models/step_10000 \ --model_path models/step_3900 \
--batch_size 100 \ --channel1_num 16 \
--batch_size 200 \
--vocab_size 172130 \ --vocab_size 172130 \
--emb_size 200 \ --emb_size 200 \
--_EOS_ 1 --_EOS_ 1
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \ python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--ext_eval \ --ext_eval \
--word_emb_init ./data/word_embedding.pkl \ --word_emb_init ./data/word_embedding.pkl \
--save_path ./models \ --save_path ./models \
--batch_size 100 \ --batch_size 256 \
--vocab_size 172130 \ --vocab_size 172130 \
--channel1_num 16 \
--emb_size 200 \ --emb_size 200 \
--_EOS_ 1 --_EOS_ 1
...@@ -6,18 +6,25 @@ import utils.layers as layers ...@@ -6,18 +6,25 @@ import utils.layers as layers
class Net(object): class Net(object):
def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size, def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
stack_num): stack_num, channel1_num, channel2_num):
self._max_turn_num = max_turn_num self._max_turn_num = max_turn_num
self._max_turn_len = max_turn_len self._max_turn_len = max_turn_len
self._vocab_size = vocab_size self._vocab_size = vocab_size
self._emb_size = emb_size self._emb_size = emb_size
self._stack_num = stack_num self._stack_num = stack_num
self._channel1_num = channel1_num
self._channel2_num = channel2_num
self.word_emb_name = "shared_word_emb" self.word_emb_name = "shared_word_emb"
self.use_stack_op = True self.use_stack_op = True
self.use_mask_cache = True self.use_mask_cache = True
self.use_sparse_embedding = True self.use_sparse_embedding = True
def set_word_embedding(self, word_emb, place):
word_emb_param = fluid.global_scope().find_var(
self.word_emb_name).get_tensor()
word_emb_param.set(word_emb, place)
def create_network(self): def create_network(self):
mask_cache = dict() if self.use_mask_cache else None mask_cache = dict() if self.use_mask_cache else None
...@@ -136,7 +143,7 @@ class Net(object): ...@@ -136,7 +143,7 @@ class Net(object):
t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1) t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1) r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
# sim shape: [batch_size, 2*(stack_num+2), max_turn_len, max_turn_len] # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]
sim = fluid.layers.matmul( sim = fluid.layers.matmul(
x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0)) x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
sim_turns.append(sim) sim_turns.append(sim)
...@@ -147,10 +154,9 @@ class Net(object): ...@@ -147,10 +154,9 @@ class Net(object):
for index in six.moves.xrange(len(sim_turns)): for index in six.moves.xrange(len(sim_turns)):
sim_turns[index] = fluid.layers.unsqueeze( sim_turns[index] = fluid.layers.unsqueeze(
input=sim_turns[index], axes=[2]) input=sim_turns[index], axes=[2])
# sim shape: [batch_size, 2*(stack_num+2), max_turn_num, max_turn_len, max_turn_len] # sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len]
sim = fluid.layers.concat(input=sim_turns, axis=2) sim = fluid.layers.concat(input=sim_turns, axis=2)
# for douban final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num)
final_info = layers.cnn_3d(sim, 32, 16)
loss, logits = layers.loss(final_info, label) loss, logits = layers.loss(final_info, label)
return loss, logits return loss, logits
...@@ -89,6 +89,16 @@ def parse_args(): ...@@ -89,6 +89,16 @@ def parse_args():
type=int, type=int,
default=5, default=5,
help='The number of stacked attentive modules in network.') help='The number of stacked attentive modules in network.')
parser.add_argument(
'--channel1_num',
type=int,
default=32,
help="The channels' number of the 1st conv3d layer's output.")
parser.add_argument(
'--channel2_num',
type=int,
default=16,
help="The channels' number of the 2nd conv3d layer's output.")
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -110,7 +120,8 @@ def test(args): ...@@ -110,7 +120,8 @@ def test(args):
} }
dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
args.emb_size, args.stack_num) args.emb_size, args.stack_num, args.channel1_num,
args.channel2_num)
loss, logits = dam.create_network() loss, logits = dam.create_network()
loss.persistable = True loss.persistable = True
......
...@@ -88,6 +88,16 @@ def parse_args(): ...@@ -88,6 +88,16 @@ def parse_args():
type=int, type=int,
default=5, default=5,
help='The number of stacked attentive modules in network.') help='The number of stacked attentive modules in network.')
parser.add_argument(
'--channel1_num',
type=int,
default=32,
help="The channels' number of the 1st conv3d layer's output.")
parser.add_argument(
'--channel2_num',
type=int,
default=16,
help="The channels' number of the 2nd conv3d layer's output.")
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -105,7 +115,8 @@ def train(args): ...@@ -105,7 +115,8 @@ def train(args):
} }
dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
args.emb_size, args.stack_num) args.emb_size, args.stack_num, args.channel1_num,
args.channel2_num)
loss, logits = dam.create_network() loss, logits = dam.create_network()
loss.persistable = True loss.persistable = True
...@@ -136,6 +147,9 @@ def train(args): ...@@ -136,6 +147,9 @@ def train(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("device count %d" % dev_count) print("device count %d" % dev_count)
print("theoretical memory usage: ")
print(fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size))
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -157,7 +171,8 @@ def train(args): ...@@ -157,7 +171,8 @@ def train(args):
print("start loading word embedding init ...") print("start loading word embedding init ...")
word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype( word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype(
'float32') 'float32')
print("finish loading word embedding init ...") dam.set_word_embedding(word_emb, place)
print("finish init word embedding ...")
print("start loading data ...") print("start loading data ...")
train_data, val_data, test_data = pickle.load(open(args.data_path, 'rb')) train_data, val_data, test_data = pickle.load(open(args.data_path, 'rb'))
...@@ -171,8 +186,6 @@ def train(args): ...@@ -171,8 +186,6 @@ def train(args):
print_step = max(1, batch_num / (dev_count * 100)) print_step = max(1, batch_num / (dev_count * 100))
save_step = max(1, batch_num / (dev_count * 10)) save_step = max(1, batch_num / (dev_count * 10))
word_emb_inited = False
print("begin model training ...") print("begin model training ...")
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
...@@ -187,12 +200,8 @@ def train(args): ...@@ -187,12 +200,8 @@ def train(args):
for dev in xrange(dev_count): for dev in xrange(dev_count):
index = it * dev_count + dev index = it * dev_count + dev
feed_dict = reader.make_one_batch_input(train_batches, index) feed_dict = reader.make_one_batch_input(train_batches, index)
if word_emb_inited is False and args.word_emb_init is not None:
feed_dict[dam.word_emb_name] = word_emb
feed_list.append(feed_dict) feed_list.append(feed_dict)
word_emb_inited = True
cost = train_exe.run(feed=feed_list, fetch_list=[loss.name]) cost = train_exe.run(feed=feed_list, fetch_list=[loss.name])
ave_cost += np.array(cost[0]).mean() ave_cost += np.array(cost[0]).mean()
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0
python -u ../test_and_evaluate.py --use_cuda \ python -u ../test_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--save_path ./ \ --save_path ./step_3900 \
--model_path models/step_10000 \ --model_path ./models/step_3900 \
--batch_size 100 \ --batch_size 200 \
--vocab_size 434512 \ --vocab_size 434512 \
--emb_size 200 \ --emb_size 200 \
--_EOS_ 28270 --_EOS_ 28270
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \ python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--word_emb_init ./data/word_embedding.pkl \ --word_emb_init ./data/word_embedding.pkl \
--save_path ./models \ --save_path ./models \
--batch_size 100 \ --batch_size 256 \
--vocab_size 434512 \ --vocab_size 434512 \
--emb_size 200 \ --emb_size 200 \
--_EOS_ 28270 --_EOS_ 28270
......
deeplabv3plus_xception65_initialize.params
deeplabv3plus.params
deeplabv3plus.tar.gz
DeepLab运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。 DeepLab运行本目录下的程序示例需要使用PaddlePaddle Fluid v1.0.0版本或以上。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本,如果使用GPU,该程序需要使用cuDNN v7版本。
## 代码结构 ## 代码结构
...@@ -41,10 +41,12 @@ data/cityscape/ ...@@ -41,10 +41,12 @@ data/cityscape/
如果需要从头开始训练模型,用户需要下载我们的初始化模型 如果需要从头开始训练模型,用户需要下载我们的初始化模型
``` ```
wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus_xception65_initialize.tar.gz wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus_xception65_initialize.tar.gz
tar -xf deeplabv3plus_xception65_initialize.tar.gz && rm deeplabv3plus_xception65_initialize.tar.gz
``` ```
如果需要最终训练模型进行fine tune或者直接用于预测,请下载我们的最终模型 如果需要最终训练模型进行fine tune或者直接用于预测,请下载我们的最终模型
``` ```
wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus.tar.gz wget http://paddlemodels.cdn.bcebos.com/deeplab/deeplabv3plus.tar.gz
tar -xf deeplabv3plus.tar.gz && rm deeplabv3plus.tar.gz
``` ```
...@@ -70,11 +72,11 @@ python train.py --help ...@@ -70,11 +72,11 @@ python train.py --help
``` ```
python ./train.py \ python ./train.py \
--batch_size=8 \ --batch_size=8 \
--parallel=true --parallel=true \
--train_crop_size=769 \ --train_crop_size=769 \
--total_step=90000 \ --total_step=90000 \
--init_weights_path=$INIT_WEIGHTS_PATH \ --init_weights_path=deeplabv3plus_xception65_initialize.params \
--save_weights_path=$SAVE_WEIGHTS_PATH \ --save_weights_path=output \
--dataset_path=$DATASET_PATH --dataset_path=$DATASET_PATH
``` ```
...@@ -82,11 +84,10 @@ python ./train.py \ ...@@ -82,11 +84,10 @@ python ./train.py \
执行以下命令在`Cityscape`测试数据集上进行测试: 执行以下命令在`Cityscape`测试数据集上进行测试:
``` ```
python ./eval.py \ python ./eval.py \
--init_weights_path=$INIT_WEIGHTS_PATH \ --init_weights=deeplabv3plus.params \
--dataset_path=$DATASET_PATH --dataset_path=$DATASET_PATH
``` ```
需要通过选项`--model_path`指定模型文件。 需要通过选项`--model_path`指定模型文件。测试脚本的输出的评估指标为mean IoU。
测试脚本的输出的评估指标为[mean IoU]()。
## 实验结果 ## 实验结果
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98' os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
...@@ -91,7 +94,7 @@ exe = fluid.Executor(place) ...@@ -91,7 +94,7 @@ exe = fluid.Executor(place)
exe.run(sp) exe.run(sp)
if args.init_weights_path: if args.init_weights_path:
print "load from:", args.init_weights_path print("load from:", args.init_weights_path)
load_model() load_model()
dataset = CityscapeDataset(args.dataset_path, 'val') dataset = CityscapeDataset(args.dataset_path, 'val')
...@@ -118,7 +121,7 @@ for i, imgs, labels, names in batches: ...@@ -118,7 +121,7 @@ for i, imgs, labels, names in batches:
mp = (wrong + right) != 0 mp = (wrong + right) != 0
miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp]))) miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp])))
if args.verbose: if args.verbose:
print 'step: %s, mIoU: %s' % (i + 1, miou2) print('step: %s, mIoU: %s' % (i + 1, miou2))
else: else:
print '\rstep: %s, mIoU: %s' % (i + 1, miou2), print('\rstep: %s, mIoU: %s' % (i + 1, miou2))
sys.stdout.flush() sys.stdout.flush()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -50,7 +53,7 @@ def append_op_result(result, name): ...@@ -50,7 +53,7 @@ def append_op_result(result, name):
def conv(*args, **kargs): def conv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights' kargs['param_attr'] = name_scope + 'weights'
if kargs.has_key('bias_attr') and kargs['bias_attr']: if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = name_scope + 'biases' kargs['bias_attr'] = name_scope + 'biases'
else: else:
kargs['bias_attr'] = False kargs['bias_attr'] = False
...@@ -62,7 +65,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): ...@@ -62,7 +65,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
N, C, H, W = input.shape N, C, H, W = input.shape
if C % G != 0: if C % G != 0:
print "group can not divide channle:", C, G print("group can not divide channle:", C, G)
for d in range(10): for d in range(10):
for t in [d, -d]: for t in [d, -d]:
if G + t <= 0: continue if G + t <= 0: continue
...@@ -70,7 +73,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): ...@@ -70,7 +73,7 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
G = G + t G = G + t
break break
if C % G == 0: if C % G == 0:
print "use group size:", G print("use group size:", G)
break break
assert C % G == 0 assert C % G == 0
param_shape = (G, ) param_shape = (G, )
...@@ -139,7 +142,7 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None): ...@@ -139,7 +142,7 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None):
filter, filter,
stride, stride,
groups=input.shape[1], groups=input.shape[1],
padding=(filter / 2) * dilation, padding=(filter // 2) * dilation,
dilation=dilation) dilation=dilation)
input = bn(input) input = bn(input)
if act: input = act(input) if act: input = act(input)
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
import os
import six
default_config = { default_config = {
"shuffle": True, "shuffle": True,
...@@ -30,7 +35,7 @@ def slice_with_pad(a, s, value=0): ...@@ -30,7 +35,7 @@ def slice_with_pad(a, s, value=0):
pr = 0 pr = 0
pads.append([pl, pr]) pads.append([pl, pr])
slices.append([l, r]) slices.append([l, r])
slices = map(lambda x: slice(x[0], x[1], 1), slices) slices = list(map(lambda x: slice(x[0], x[1], 1), slices))
a = a[slices] a = a[slices]
a = np.pad(a, pad_width=pads, mode='constant', constant_values=value) a = np.pad(a, pad_width=pads, mode='constant', constant_values=value)
return a return a
...@@ -38,11 +43,17 @@ def slice_with_pad(a, s, value=0): ...@@ -38,11 +43,17 @@ def slice_with_pad(a, s, value=0):
class CityscapeDataset: class CityscapeDataset:
def __init__(self, dataset_dir, subset='train', config=default_config): def __init__(self, dataset_dir, subset='train', config=default_config):
import commands label_dirname = os.path.join(dataset_dir, 'gtFine/' + subset)
label_dirname = dataset_dir + 'gtFine/' + subset if six.PY2:
label_files = commands.getoutput( import commands
"find %s -type f | grep labelTrainIds | sort" % label_files = commands.getoutput(
label_dirname).splitlines() "find %s -type f | grep labelTrainIds | sort" %
label_dirname).splitlines()
else:
import subprocess
label_files = subprocess.getstatusoutput(
"find %s -type f | grep labelTrainIds | sort" %
label_dirname)[-1].splitlines()
self.label_files = label_files self.label_files = label_files
self.label_dirname = label_dirname self.label_dirname = label_dirname
self.index = 0 self.index = 0
...@@ -50,7 +61,7 @@ class CityscapeDataset: ...@@ -50,7 +61,7 @@ class CityscapeDataset:
self.dataset_dir = dataset_dir self.dataset_dir = dataset_dir
self.config = config self.config = config
self.reset() self.reset()
print "total number", len(label_files) print("total number", len(label_files))
def reset(self, shuffle=False): def reset(self, shuffle=False):
self.index = 0 self.index = 0
...@@ -66,13 +77,14 @@ class CityscapeDataset: ...@@ -66,13 +77,14 @@ class CityscapeDataset:
shape = self.config["crop_size"] shape = self.config["crop_size"]
while True: while True:
ln = self.label_files[self.index] ln = self.label_files[self.index]
img_name = self.dataset_dir + 'leftImg8bit/' + self.subset + ln[len( img_name = os.path.join(
self.label_dirname):] self.dataset_dir,
'leftImg8bit/' + self.subset + ln[len(self.label_dirname):])
img_name = img_name.replace('gtFine_labelTrainIds', 'leftImg8bit') img_name = img_name.replace('gtFine_labelTrainIds', 'leftImg8bit')
label = cv2.imread(ln) label = cv2.imread(ln)
img = cv2.imread(img_name) img = cv2.imread(img_name)
if img is None: if img is None:
print "load img failed:", img_name print("load img failed:", img_name)
self.next_img() self.next_img()
else: else:
break break
...@@ -128,5 +140,7 @@ class CityscapeDataset: ...@@ -128,5 +140,7 @@ class CityscapeDataset:
from prefetch_generator import BackgroundGenerator from prefetch_generator import BackgroundGenerator
batches = BackgroundGenerator(batches, 100) batches = BackgroundGenerator(batches, 100)
except: except:
print "You can install 'prefetch_generator' for acceleration of data reading." print(
"You can install 'prefetch_generator' for acceleration of data reading."
)
return batches return batches
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98' os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
...@@ -126,13 +129,12 @@ exe = fluid.Executor(place) ...@@ -126,13 +129,12 @@ exe = fluid.Executor(place)
exe.run(sp) exe.run(sp)
if args.init_weights_path: if args.init_weights_path:
print "load from:", args.init_weights_path print("load from:", args.init_weights_path)
load_model() load_model()
dataset = CityscapeDataset(args.dataset_path, 'train') dataset = CityscapeDataset(args.dataset_path, 'train')
if args.parallel: if args.parallel:
print "Using ParallelExecutor."
exe_p = fluid.ParallelExecutor( exe_p = fluid.ParallelExecutor(
use_cuda=True, loss_name=loss_mean.name, main_program=tp) use_cuda=True, loss_name=loss_mean.name, main_program=tp)
...@@ -149,9 +151,9 @@ for i, imgs, labels, names in batches: ...@@ -149,9 +151,9 @@ for i, imgs, labels, names in batches:
'label': labels}, 'label': labels},
fetch_list=[pred, loss_mean]) fetch_list=[pred, loss_mean])
if i % 100 == 0: if i % 100 == 0:
print "Model is saved to", args.save_weights_path print("Model is saved to", args.save_weights_path)
save_model() save_model()
print "step %s, loss: %s" % (i, np.mean(retv[1])) print("step %s, loss: %s" % (i, np.mean(retv[1])))
print "Training done. Model is saved to", args.save_weights_path print("Training done. Model is saved to", args.save_weights_path)
save_model() save_model()
...@@ -10,3 +10,4 @@ output* ...@@ -10,3 +10,4 @@ output*
pred pred
eval_tools eval_tools
box* box*
PyramidBox_WiderFace*
...@@ -427,6 +427,7 @@ class PyramidBox(object): ...@@ -427,6 +427,7 @@ class PyramidBox(object):
overlap_threshold=0.35, overlap_threshold=0.35,
neg_overlap=0.35) neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss) loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return loss return loss
def train(self): def train(self):
......
...@@ -189,13 +189,13 @@ def train(args, config, train_params, train_file_list): ...@@ -189,13 +189,13 @@ def train(args, config, train_params, train_file_list):
fetch_vars = [np.mean(np.array(v)) for v in fetch_vars] fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
if batch_id % 10 == 0: if batch_id % 10 == 0:
if not args.use_pyramidbox: if not args.use_pyramidbox:
print("Pass {0}, batch {1}, loss {2}, time {3}".format( print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
pass_id, batch_id, fetch_vars[0], pass_id, batch_id, fetch_vars[0],
start_time - prev_start_time)) start_time - prev_start_time))
else: else:
print("Pass {0}, batch {1}, face loss {2}, " \ print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
"head loss {3}, " \ "head loss {:.6f}, " \
"time {4}".format(pass_id, "time {:.5f}".format(pass_id,
batch_id, fetch_vars[0], fetch_vars[1], batch_id, fetch_vars[0], fetch_vars[1],
start_time - prev_start_time)) start_time - prev_start_time))
if pass_id % 1 == 0 or pass_id == epoc_num - 1: if pass_id % 1 == 0 or pass_id == epoc_num - 1:
......
...@@ -82,9 +82,6 @@ def save_widerface_bboxes(image_path, bboxes_scores, output_dir): ...@@ -82,9 +82,6 @@ def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
image_name = image_path.split('/')[-1] image_name = image_path.split('/')[-1]
image_class = image_path.split('/')[-2] image_class = image_path.split('/')[-2]
image_name = image_name.encode('utf-8')
image_class = image_class.encode('utf-8')
odir = os.path.join(output_dir, image_class) odir = os.path.join(output_dir, image_class)
if not os.path.exists(odir): if not os.path.exists(odir):
os.makedirs(odir) os.makedirs(odir)
......
# Faster RCNN Objective Detection
---
## Table of Contents
- [Installation](#installation)
- [Introduction](#introduction)
- [Data preparation](#data-preparation)
- [Training](#training)
- [Finetuning](#finetuning)
- [Evaluation](#evaluation)
- [Inference and Visualization](#inference-and-visualization)
- [Appendix](#appendix)
## Installation
Running sample code in this directory requires PaddelPaddle Fluid v.1.0.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/zh/0.15.0/beginners_guide/install/install_doc.html#paddlepaddle) and make an update.
## Introduction
[Faster Rcnn](https://arxiv.org/abs/1506.01497) is a typical two stage detector. The total framework of network can be divided into four parts, as shown below:
<p align="center">
<img src="image/Faster_RCNN.jpg" height=400 width=400 hspace='10'/> <br />
Faster RCNN model
</p>
1. Base conv layer。As a CNN objective dection, Faster RCNN extract feature maps using a basic convolutional network. The feature maps then can be shared by RPN and fc layers. This sampel uses [ResNet-50](https://arxiv.org/abs/1512.03385) as base conv layer.
2. Region Proposal Network (RPN)。RPN generates proposals for detection。This block generates anchors by a set of size and ratio and classifies anchors into fore-ground and back-ground by softmax. Then refine anchors to obtain more precise proposals using box regression.
3. RoI pooling。This layer takes feature maps and proposals as input. The proposals are mapped to feature maps and pooled to the same size. The output are sent to fc layers for classification and regression.
4. Detection layer。Using the output of roi pooling to compute the class and locatoin of each proposal in two fc layers.
## Data preparation
Train the model on [MS-COCO dataset](http://cocodataset.org/#download), download dataset as below:
cd dataset/coco
./download.sh
## Training
After data preparation, one can start the training step by:
python train.py \
--max_size=1333 \
--scales=800 \
--batch_size=8 \
--model_save_dir=output/
- Set ```export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7``` to specifiy 8 GPU to train.
- For more help on arguments:
python train.py --help
**download the pre-trained model:** This sample provides Resnet-50 pre-trained model which is converted from Caffe. The model fuses the parameters in batch normalization layer. One can download pre-trained model as:
sh ./pretrained/download.sh
Set `pretrained_model` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well.
**data reader introduction:**
* Data reader is defined in `reader.py`.
* Scaling the short side of all images to `scales`. If the long side is larger than `max_size`, then scaling the long side to `max_size`.
* In training stage, images are horizontally flipped.
* Images in the same batch can be padding to the same size.
**model configuration:**
* Use RoIPooling.
* NMS threshold=0.7. During training, pre\_nms=12000, post\_nms=2000; during test, pre\_nms=6000, post\_nms=1000.
* In generating proposal lables, fg\_fraction=0.25, fg\_thresh=0.5, bg\_thresh_hi=0.5, bg\_thresh\_lo=0.0.
* In rpn target assignment, rpn\_fg\_fraction=0.5, rpn\_positive\_overlap=0.7, rpn\_negative\_overlap=0.3.
**training strategy:**
* Use momentum optimizer with momentum=0.9.
* Weight decay is 0.0001.
* In first 500 iteration, the learning rate increases linearly from 0.00333 to 0.01. Then lr is decayed at 120000, 160000 iteration with multiplier 0.1, 0.01. The maximum iteration is 180000.
* Set the learning rate of bias to two times as global lr in non basic convolutional layers.
* In basic convolutional layers, parameters of affine layers and res body do not update.
* Use Nvidia Tesla V100 8GPU, total time for training is about 40 hours.
Training result is shown as below:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN train loss
</p>
* Fluid all padding: Each image padding to 1333\*1333.
* Fluid minibatch padding: Images in one batch padding to the same size. This method is same as detectron.
* Fluid no padding: Images without padding.
## Finetuning
Finetuning is to finetune model weights in a specific task by loading pretrained weights. After initializing ```pretrained_model```, one can finetune a model as:
python train.py
--max_size=1333 \
--scales=800 \
--pretrained_model=${path_to_pretrain_model} \
--batch_size= 8\
--model_save_dir=output/
## Evaluation
Evaluation is to evaluate the performance of a trained model. This sample provides `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). To use `eval_coco_map.py` , [cocoapi](https://github.com/cocodataset/cocoapi) is needed. Install the cocoapi:
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# if cython is not installed
pip install Cython
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
`eval_coco_map.py` is the main executor for evalution, one can start evalution step by:
python eval_coco_map.py \
--dataset=coco2017 \
--pretrained_mode=${path_to_pretrain_model} \
--batch_size=1 \
--nms_threshold=0.5 \
--score_threshold=0.05
Evalutaion result is shown as below:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
| Model | Batch size | Max iteration | mAP |
| :------------------------------ | :------------: | :-------------------:|------: |
| Detectron | 8 | 180000 | 0.315 |
| Fluid minibatch padding | 8 | 180000 | 0.314 |
| Fluid all padding | 8 | 180000 | 0.308 |
| Fluid no padding |6 | 240000 | 0.317 |
* Fluid all padding: Each image padding to 1333\*1333.
* Fluid minibatch padding: Images in one batch padding to the same size. This method is same as detectron.
* Fluid no padding: Images without padding.
## Inference and Visualization
Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, one can start infer step by:
python infer.py \
--dataset=coco2017 \
--pretrained_model=${path_to_pretrain_model} \
--image_path=data/COCO17/val2017/ \
--image_name=000000000139.jpg \
--draw_threshold=0.6
Visualization of infer result is shown as below:
<p align="center">
<img src="image/000000000139.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000127517.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000203864.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000515077.jpg" height=300 width=400 hspace='10'/> <br />
Faster RCNN Visualization Examples
</p>
# Faster RCNN 目标检测
---
## 内容
- [安装](#安装)
- [简介](#简介)
- [数据准备](#数据准备)
- [模型训练](#模型训练)
- [参数微调](#参数微调)
- [模型评估](#模型评估)
- [模型推断及可视化](#模型推断及可视化)
- [附录](#附录)
## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.0.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/0.15.0/beginners_guide/install/install_doc.html#paddlepaddle)中的说明来更新PaddlePaddle。
## 简介
[Faster Rcnn](https://arxiv.org/abs/1506.01497) 是典型的两阶段目标检测器。如下图所示,整体网络可以分为4个主要内容:
<p align="center">
<img src="image/Faster_RCNN.jpg" height=400 width=400 hspace='10'/> <br />
Faster RCNN 目标检测模型
</p>
1. 基础卷积层。作为一种卷积神经网络目标检测方法,Faster RCNN首先使用一组基础的卷积网络提取图像的特征图。特征图被后续RPN层和全连接层共享。本示例采用[ResNet-50](https://arxiv.org/abs/1512.03385)作为基础卷积层。
2. 区域生成网络(RPN)。RPN网络用于生成候选区域(proposals)。该层通过一组固定的尺寸和比例得到一组锚点(anchors), 通过softmax判断锚点属于前景或者背景,再利用区域回归修正锚点从而获得精确的候选区域。
3. RoI池化。该层收集输入的特征图和候选区域,将候选区域映射到特征图中并池化为统一大小的区域特征图,送入全连接层判定目标类别。
4. 检测层。利用区域特征图计算候选区域的类别,同时再次通过区域回归获得检测框最终的精确位置。
## 数据准备
[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。
cd dataset/coco
./download.sh
## 模型训练
数据准备完毕后,可以通过如下的方式启动训练:
python train.py \
--max_size=1333 \
--scales=800 \
--batch_size=8 \
--model_save_dir=output/ \
--pretrained_model=${path_to_pretrain_model}
- 通过设置export CUDA\_VISIBLE\_DEVICES=0,1,2,3,4,5,6,7指定8卡GPU训练。
- 可选参数见:
python train.py --help
**下载预训练模型:** 本示例提供Resnet-50预训练模型,该模性转换自Caffe,并对批标准化层(Batch Normalization Layer)进行参数融合。采用如下命令下载预训练模型:
sh ./pretrained/download.sh
通过初始化`pretrained_model` 加载预训练模型。同时在参数微调时也采用该设置加载已训练模型。
**数据读取器说明:** 数据读取器定义在reader.py中。所有图像将短边等比例缩放至`scales`,若长边大于`max_size`, 则再次将长边等比例缩放至`max_iter`。在训练阶段,对图像采用水平翻转。支持将同一个batch内的图像padding为相同尺寸。
**模型设置:**
* 使用RoIPooling。
* 训练过程pre\_nms=12000, post\_nms=2000,测试过程pre\_nms=6000, post\_nms=1000。nms阈值为0.7。
* RPN网络得到labels的过程中,fg\_fraction=0.25,fg\_thresh=0.5,bg\_thresh_hi=0.5,bg\_thresh\_lo=0.0
* RPN选择anchor时,rpn\_fg\_fraction=0.5,rpn\_positive\_overlap=0.7,rpn\_negative\_overlap=0.3
下图为模型训练结果:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN 训练loss
</p>
* Fluid all padding: 每张图像填充为1333\*1333大小。
* Fluid minibatch padding: 同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid no padding: 不对图像做填充处理。
**训练策略:**
* 采用momentum优化算法训练Faster RCNN,momentum=0.9。
* 权重衰减系数为0.0001,前500轮学习率从0.00333线性增加至0.01。在120000,160000轮时使用0.1,0.01乘子进行学习率衰减,最大训练180000轮。
* 非基础卷积层卷积bias学习率为整体学习率2倍。
* 基础卷积层中,affine_layers参数不更新,res2层参数不更新。
* 使用Nvidia Tesla V100 8卡并行,总共训练时长大约40小时。
## 模型评估
模型评估是指对训练完毕的模型评估各类性能指标。本示例采用[COCO官方评估](http://cocodataset.org/#detections-eval),使用前需要首先下载[cocoapi](https://github.com/cocodataset/cocoapi)
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# if cython is not installed
pip install Cython
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
`eval_coco_map.py`是评估模块的主要执行程序,调用示例如下:
python eval_coco_map.py \
--dataset=coco2017 \
--pretrained_mode=${path_to_pretrain_model} \
--batch_size=1 \
--nms_threshold=0.5 \
--score_threshold=0.05
下图为模型评估结果:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
| 模型 | 批量大小 | 迭代次数 | mAP |
| :------------------------------ | :------------: | :------------------: |------: |
| Detectron | 8 | 180000 | 0.315 |
| Fluid minibatch padding | 8 | 180000 | 0.314 |
| Fluid all padding | 8 | 180000 | 0.308 |
| Fluid no padding |6 | 240000 | 0.317 |
* Fluid all padding: 每张图像填充为1333\*1333大小。
* Fluid minibatch padding: 同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid no padding: 不对图像做填充处理。
## 模型推断及可视化
模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下:
python infer.py \
--dataset=coco2017 \
--pretrained_model=${path_to_pretrain_model} \
--image_path=data/COCO17/val2017/ \
--image_name=000000000139.jpg \
--draw_threshold=0.6
下图为模型可视化预测结果:
<p align="center">
<img src="image/000000000139.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000127517.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000203864.jpg" height=300 width=400 hspace='10'/>
<img src="image/000000515077.jpg" height=300 width=400 hspace='10'/> <br />
Faster RCNN 预测可视化
</p>
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import time import time
import numpy as np import numpy as np
import argparse
import functools
from eval_helper import get_nmsed_box from eval_helper import get_nmsed_box
from eval_helper import get_dt_res from eval_helper import get_dt_res
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
from utility import print_arguments, parse_args from utility import print_arguments, parse_args
# A special mAP metric for COCO dataset, which averages AP in different IoUs.
# To use this eval_coco_map.py, [cocoapi](https://github.com/cocodataset/cocoapi) is needed.
import models.model_builder as model_builder import models.model_builder as model_builder
import models.resnet as resnet import models.resnet as resnet
import json import json
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os import os
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
......
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://paddlemodels.bj.bcebos.com/faster_rcnn/imagenet_resnet50_fusebn.tar.gz
echo "Extracting..."
tar -xf imagenet_resnet50_fusebn.tar.gz
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os import os
import time import time
import numpy as np import numpy as np
import argparse import argparse
import functools from utility import parse_args, add_arguments, print_arguments
import shutil
import cPickle
from utility import add_arguments, print_arguments
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -16,50 +27,12 @@ import models.model_builder as model_builder ...@@ -16,50 +27,12 @@ import models.model_builder as model_builder
import models.resnet as resnet import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay from learning_rate import exponential_with_warmup_decay
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
# ENV
add_arg('parallel', bool, True, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.")
add_arg('dataset', str, 'coco2017', "coco2014, coco2017, and pascalvoc.")
add_arg('data_dir', str, 'data/COCO17', "data directory")
add_arg('skip_reader', bool, False, "Whether to skip data reader.")
add_arg('use_profile', bool, False, "Whether to use profiler tool.")
add_arg('class_num', int, 81, "Class number.")
add_arg('use_pyreader', bool, False, "Class number.")
# SOLVER
add_arg('learning_rate', float, 0.01, "Learning rate.")
add_arg('num_iteration', int, 10, "Epoch number.")
# RPN
add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.")
add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.")
add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.")
add_arg('rpn_stride', float, 16., "Stride of the feature map that RPN is attached.")
# FAST RCNN
# TRAIN TEST
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('max_size', int, 1333, "The max resized image size.")
add_arg('scales', int, [800], "The resized image height.")
add_arg('batch_size_per_im',int, 512, "fast rcnn head batch size")
add_arg('mean_value', float, [102.9801, 115.9465, 122.7717], "pixel mean")
add_arg('debug', bool, False, "Debug mode")
#yapf: enable
def train(cfg): def train(cfg):
batch_size = cfg.batch_size batch_size = cfg.batch_size
learning_rate = cfg.learning_rate learning_rate = cfg.learning_rate
image_shape = [3, cfg.max_size, cfg.max_size] image_shape = [3, cfg.max_size, cfg.max_size]
num_iterations = cfg.num_iteration num_iterations = cfg.max_iter
if cfg.debug:
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
import random
random.seed(0)
np.random.seed(0)
devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(",")) devices_num = len(devices.split(","))
...@@ -72,21 +45,22 @@ def train(cfg): ...@@ -72,21 +45,22 @@ def train(cfg):
use_random=False) use_random=False)
model.build_model(image_shape) model.build_model(image_shape)
loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss() loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss()
loss_cls.persistable=True loss_cls.persistable = True
loss_bbox.persistable=True loss_bbox.persistable = True
rpn_cls_loss.persistable=True rpn_cls_loss.persistable = True
rpn_reg_loss.persistable=True rpn_reg_loss.persistable = True
loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss
boundaries = [120000, 160000] boundaries = [120000, 160000]
values = [learning_rate, learning_rate*0.1, learning_rate*0.01] values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(learning_rate=learning_rate, learning_rate=exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries, boundaries=boundaries,
values=values, values=values,
warmup_iter=500, warmup_iter=500,
warmup_factor=1.0/3.0), warmup_factor=1.0 / 3.0),
regularization=fluid.regularizer.L2Decay(0.0001), regularization=fluid.regularizer.L2Decay(0.0001),
momentum=0.9) momentum=0.9)
optimizer.minimize(loss) optimizer.minimize(loss)
...@@ -98,22 +72,33 @@ def train(cfg): ...@@ -98,22 +72,33 @@ def train(cfg):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
if cfg.pretrained_model: if cfg.pretrained_model:
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
if cfg.parallel: if cfg.parallel:
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=bool(cfg.use_gpu), loss_name=loss.name) use_cuda=bool(cfg.use_gpu), loss_name=loss.name)
assert cfg.batch_size % devices_num == 0, \
"batch_size = %d, devices_num = %d" %(cfg.batch_size, devices_num)
batch_size_per_dev = cfg.batch_size / devices_num
if cfg.use_pyreader: if cfg.use_pyreader:
train_reader = reader.train(cfg, batch_size=1, shuffle=not cfg.debug) train_reader = reader.train(
cfg,
batch_size=batch_size_per_dev,
total_batch_size=cfg.batch_size,
padding_total=cfg.padding_minibatch,
shuffle=False)
py_reader = model.py_reader py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader) py_reader.decorate_paddle_reader(train_reader)
else: else:
train_reader = reader.train(cfg, batch_size=cfg.batch_size, shuffle=not cfg.debug) train_reader = reader.train(
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) cfg, batch_size=cfg.batch_size, shuffle=False)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss] fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss]
...@@ -124,22 +109,27 @@ def train(cfg): ...@@ -124,22 +109,27 @@ def train(cfg):
for batch_id in range(iterations): for batch_id in range(iterations):
start_time = time.time() start_time = time.time()
data = train_reader().next() data = next(train_reader())
end_time = time.time() end_time = time.time()
reader_time.append(end_time - start_time) reader_time.append(end_time - start_time)
start_time = time.time() start_time = time.time()
losses = train_exe.run(fetch_list=[v.name for v in fetch_list], if cfg.parallel:
feed=feeder.feed(data)) losses = train_exe.run(fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
else:
losses = exe.run(fluid.default_main_program(),
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
end_time = time.time() end_time = time.time()
run_time.append(end_time - start_time) run_time.append(end_time - start_time)
total_images += data[0][0].shape[0] total_images += len(data)
lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) lr = np.array(fluid.global_scope().find_var('learning_rate')
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format( .get_tensor())
batch_id, lr[0], losses[0][0])) print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[0],
losses[0][0]))
return reader_time, run_time, total_images return reader_time, run_time, total_images
def run_pyreader(iterations): def run_pyreader(iterations):
reader_time = [0] reader_time = [0]
run_time = [] run_time = []
...@@ -149,13 +139,19 @@ def train(cfg): ...@@ -149,13 +139,19 @@ def train(cfg):
try: try:
for batch_id in range(iterations): for batch_id in range(iterations):
start_time = time.time() start_time = time.time()
losses = train_exe.run(fetch_list=[v.name for v in fetch_list]) if cfg.parallel:
losses = train_exe.run(
fetch_list=[v.name for v in fetch_list])
else:
losses = exe.run(fluid.default_main_program(),
fetch_list=[v.name for v in fetch_list])
end_time = time.time() end_time = time.time()
run_time.append(end_time - start_time) run_time.append(end_time - start_time)
total_images += devices_num total_images += devices_num
lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) lr = np.array(fluid.global_scope().find_var('learning_rate')
print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format( .get_tensor())
batch_id, lr[0], losses[0][0])) print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[
0], losses[0][0]))
except fluid.core.EOFException: except fluid.core.EOFException:
py_reader.reset() py_reader.reset()
...@@ -167,20 +163,23 @@ def train(cfg): ...@@ -167,20 +163,23 @@ def train(cfg):
run_func(2) run_func(2)
# profiling # profiling
start = time.time() start = time.time()
if cfg.use_profile: use_profile = False
if use_profile:
with profiler.profiler('GPU', 'total', '/tmp/profile_file'): with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
reader_time, run_time, total_images = run(num_iterations) reader_time, run_time, total_images = run_func(num_iterations)
else: else:
reader_time, run_time, total_images = run_func(num_iterations) reader_time, run_time, total_images = run_func(num_iterations)
end = time.time() end = time.time()
total_time = end - start total_time = end - start
print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".format( print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".
total_time, np.sum(reader_time), np.sum(run_time), total_images / total_time)) format(total_time,
np.sum(reader_time),
np.sum(run_time), total_images / total_time))
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
data_args = reader.Settings(args) data_args = reader.Settings(args)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -150,6 +150,8 @@ def coco(settings, ...@@ -150,6 +150,8 @@ def coco(settings,
else: else:
for roidb in roidbs: for roidb in roidbs:
if settings.image_name not in roidb['image']:
continue
im, im_info, im_id = roidb_reader(roidb, mode) im, im_info, im_id = roidb_reader(roidb, mode)
batch_out = [(im, im_info, im_id)] batch_out = [(im, im_info, im_id)]
yield batch_out yield batch_out
......
...@@ -26,7 +26,6 @@ from __future__ import print_function ...@@ -26,7 +26,6 @@ from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import copy import copy
import cPickle as pickle
import logging import logging
import numpy as np import numpy as np
import os import os
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import time import sys
import numpy as np import numpy as np
import argparse import time
import functools
import shutil import shutil
import cPickle
from utility import parse_args, print_arguments, SmoothedValue from utility import parse_args, print_arguments, SmoothedValue
import paddle import paddle
...@@ -117,7 +132,7 @@ def train(cfg): ...@@ -117,7 +132,7 @@ def train(cfg):
iter_id, lr[0], iter_id, lr[0],
smoothed_loss.get_median_value( smoothed_loss.get_median_value(
), start_time - prev_start_time)) ), start_time - prev_start_time))
#print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0]) sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0: if (iter_id + 1) % cfg.snapshot_stride == 0:
save_model("model_iter{}".format(iter_id)) save_model("model_iter{}".format(iter_id))
except fluid.core.EOFException: except fluid.core.EOFException:
...@@ -143,7 +158,7 @@ def train(cfg): ...@@ -143,7 +158,7 @@ def train(cfg):
print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
iter_id, lr[0], iter_id, lr[0],
smoothed_loss.get_median_value(), start_time - prev_start_time)) smoothed_loss.get_median_value(), start_time - prev_start_time))
#print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0]) sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0: if (iter_id + 1) % cfg.snapshot_stride == 0:
save_model("model_iter{}".format(iter_id)) save_model("model_iter{}".format(iter_id))
if (iter_id + 1) == cfg.max_iter: if (iter_id + 1) == cfg.max_iter:
......
"""Contains common utility functions."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
...@@ -12,6 +11,9 @@ ...@@ -12,6 +11,9 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
"""
Contains common utility functions.
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -83,8 +85,7 @@ class SmoothedValue(object): ...@@ -83,8 +85,7 @@ class SmoothedValue(object):
def parse_args(): def parse_args():
""" """return all args
return all args
""" """
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
......
...@@ -12,8 +12,12 @@ ...@@ -12,8 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys import sys
import os import os
import six
import argparse import argparse
import functools import functools
import matplotlib import matplotlib
...@@ -40,7 +44,9 @@ add_arg('use_gpu', bool, True, "Whether to use GPU to train.") ...@@ -40,7 +44,9 @@ add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
def loss(x, label): def loss(x, label):
return fluid.layers.mean(x * (label - 0.5)) return fluid.layers.mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=x, label=label))
def train(args): def train(args):
...@@ -67,7 +73,10 @@ def train(args): ...@@ -67,7 +73,10 @@ def train(args):
g_program_test = dg_program.clone(for_test=True) g_program_test = dg_program.clone(for_test=True)
dg_logit = D_cond(g_img, conditions) dg_logit = D_cond(g_img, conditions)
dg_loss = loss(dg_logit, 1) dg_loss = loss(
dg_logit,
fluid.layers.fill_constant_batch_size_like(
input=noise, dtype='float32', shape=[-1, 1], value=1.0))
opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
...@@ -97,7 +106,7 @@ def train(args): ...@@ -97,7 +106,7 @@ def train(args):
noise_data = np.random.uniform( noise_data = np.random.uniform(
low=-1.0, high=1.0, low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32') size=[args.batch_size, NOISE_SIZE]).astype('float32')
real_image = np.array(map(lambda x: x[0], data)).reshape( real_image = np.array(list(map(lambda x: x[0], data))).reshape(
-1, 784).astype('float32') -1, 784).astype('float32')
conditions_data = np.array([x[1] for x in data]).reshape( conditions_data = np.array([x[1] for x in data]).reshape(
[-1, 1]).astype("float32") [-1, 1]).astype("float32")
...@@ -133,7 +142,7 @@ def train(args): ...@@ -133,7 +142,7 @@ def train(args):
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]] d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
for _ in xrange(NUM_TRAIN_TIMES_OF_DG): for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform( noise_data = np.random.uniform(
low=-1.0, high=1.0, low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32') size=[args.batch_size, NOISE_SIZE]).astype('float32')
...@@ -154,7 +163,7 @@ def train(args): ...@@ -154,7 +163,7 @@ def train(args):
total_images = np.concatenate([real_image, generated_images]) total_images = np.concatenate([real_image, generated_images])
fig = plot(total_images) fig = plot(total_images)
msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format( msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format(
pass_id, batch_id, d_loss_np, dg_loss_np, pass_id, batch_id, np.mean(d_loss_np), dg_loss_np,
check(generated_images)) check(generated_images))
print(msg) print(msg)
plt.title(msg) plt.title(msg)
......
...@@ -12,11 +12,15 @@ ...@@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys import sys
import os import os
import argparse import argparse
import functools import functools
import matplotlib import matplotlib
import six
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -32,15 +36,17 @@ LEARNING_RATE = 2e-4 ...@@ -32,15 +36,17 @@ LEARNING_RATE = 2e-4
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 121, "Minibatch size.") add_arg('batch_size', int, 128, "Minibatch size.")
add_arg('epoch', int, 20, "The number of epoched to be trained.") add_arg('epoch', int, 20, "The number of epoched to be trained.")
add_arg('output', str, "./output", "The directory the model and the test result to be saved to.") add_arg('output', str, "./output_dcgan", "The directory the model and the test result to be saved to.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.") add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
# yapf: enable # yapf: enable
def loss(x, label): def loss(x, label):
return fluid.layers.mean(x * (label - 0.5)) return fluid.layers.mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=x, label=label))
def train(args): def train(args):
...@@ -63,7 +69,10 @@ def train(args): ...@@ -63,7 +69,10 @@ def train(args):
g_program_test = dg_program.clone(for_test=True) g_program_test = dg_program.clone(for_test=True)
dg_logit = D(g_img) dg_logit = D(g_img)
dg_loss = loss(dg_logit, 1) dg_loss = loss(
dg_logit,
fluid.layers.fill_constant_batch_size_like(
input=noise, dtype='float32', shape=[-1, 1], value=1.0))
opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
...@@ -93,7 +102,7 @@ def train(args): ...@@ -93,7 +102,7 @@ def train(args):
noise_data = np.random.uniform( noise_data = np.random.uniform(
low=-1.0, high=1.0, low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32') size=[args.batch_size, NOISE_SIZE]).astype('float32')
real_image = np.array(map(lambda x: x[0], data)).reshape( real_image = np.array(list(map(lambda x: x[0], data))).reshape(
-1, 784).astype('float32') -1, 784).astype('float32')
real_labels = np.ones( real_labels = np.ones(
shape=[real_image.shape[0], 1], dtype='float32') shape=[real_image.shape[0], 1], dtype='float32')
...@@ -123,7 +132,7 @@ def train(args): ...@@ -123,7 +132,7 @@ def train(args):
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]] d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
for _ in xrange(NUM_TRAIN_TIMES_OF_DG): for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform( noise_data = np.random.uniform(
low=-1.0, high=1.0, low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32') size=[args.batch_size, NOISE_SIZE]).astype('float32')
...@@ -139,9 +148,9 @@ def train(args): ...@@ -139,9 +148,9 @@ def train(args):
fetch_list={g_img})[0] fetch_list={g_img})[0]
total_images = np.concatenate([real_image, generated_images]) total_images = np.concatenate([real_image, generated_images])
fig = plot(total_images) fig = plot(total_images)
msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format( msg = "Epoch ID={0} Batch ID={1} D-Loss={2} DG-Loss={3}\n gen={4}".format(
pass_id, batch_id, d_loss_np, dg_loss_np, pass_id, batch_id,
check(generated_images)) np.mean(d_loss_np), dg_loss_np, check(generated_images))
print(msg) print(msg)
plt.title(msg) plt.title(msg)
plt.savefig( plt.savefig(
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import get_parent_function_name from utility import get_parent_function_name
...@@ -104,13 +107,13 @@ def D_cond(image, y): ...@@ -104,13 +107,13 @@ def D_cond(image, y):
def G_cond(z, y): def G_cond(z, y):
s_h, s_w = output_height, output_width s_h, s_w = output_height, output_width
s_h2, s_h4 = int(s_h / 2), int(s_h / 4) s_h2, s_h4 = int(s_h // 2), int(s_h // 4)
s_w2, s_w4 = int(s_w / 2), int(s_w / 4) s_w2, s_w4 = int(s_w // 2), int(s_w // 4)
yb = fluid.layers.reshape(y, [-1, y_dim, 1, 1]) #NCHW yb = fluid.layers.reshape(y, [-1, y_dim, 1, 1]) #NCHW
z = fluid.layers.concat([z, y], 1) z = fluid.layers.concat([z, y], 1)
h0 = bn(fc(z, gfc_dim / 2), act='relu') h0 = bn(fc(z, gfc_dim // 2), act='relu')
h0 = fluid.layers.concat([h0, y], 1) h0 = fluid.layers.concat([h0, y], 1)
h1 = bn(fc(h0, gf_dim * 2 * s_h4 * s_w4), act='relu') h1 = bn(fc(h0, gf_dim * 2 * s_h4 * s_w4), act='relu')
...@@ -134,8 +137,8 @@ def D(x): ...@@ -134,8 +137,8 @@ def D(x):
def G(x): def G(x):
x = bn(fc(x, gfc_dim)) x = bn(fc(x, gfc_dim))
x = bn(fc(x, gf_dim * 2 * img_dim / 4 * img_dim / 4)) x = bn(fc(x, gf_dim * 2 * img_dim // 4 * img_dim // 4))
x = fluid.layers.reshape(x, [-1, gf_dim * 2, img_dim / 4, img_dim / 4]) x = fluid.layers.reshape(x, [-1, gf_dim * 2, img_dim // 4, img_dim // 4])
x = deconv(x, gf_dim * 2, act='relu', output_size=[14, 14]) x = deconv(x, gf_dim * 2, act='relu', output_size=[14, 14])
x = deconv(x, 1, filter_size=5, padding=2, act='tanh', output_size=[28, 28]) x = deconv(x, 1, filter_size=5, padding=2, act='tanh', output_size=[28, 28])
x = fluid.layers.reshape(x, shape=[-1, 28 * 28]) x = fluid.layers.reshape(x, shape=[-1, 28 * 28])
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math import math
import distutils.util import distutils.util
import numpy as np import numpy as np
import inspect import inspect
import matplotlib import matplotlib
import six
matplotlib.use('agg') matplotlib.use('agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec import matplotlib.gridspec as gridspec
...@@ -54,7 +58,7 @@ def print_arguments(args): ...@@ -54,7 +58,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
......
...@@ -21,21 +21,23 @@ TODO ...@@ -21,21 +21,23 @@ TODO
horse2zebra训练集包含1069张野马图片,1336张斑马图片。测试集包含121张野马图片和141张斑马图片。 horse2zebra训练集包含1069张野马图片,1336张斑马图片。测试集包含121张野马图片和141张斑马图片。
数据下载处理完毕后,并组织为以下路径: 数据下载处理完毕后,并组织为以下路径结构
``` ```
horse2zebra/ data
|-- testA |-- horse2zebra
|-- testA.txt | |-- testA
|-- testB | |-- testA.txt
|-- testB.txt | |-- testB
|-- trainA | |-- testB.txt
|-- trainA.txt | |-- trainA
|-- trainB | |-- trainA.txt
`-- trainB.txt | |-- trainB
| `-- trainB.txt
``` ```
以上数据文件中,‘testA’为存放野马测试图片的文件夹,‘testB’为存放斑马测试图片的文件夹,'testA.txt'和'testB.txt'分别为野马和斑马测试图片路径列表文件,格式如下: 以上数据文件中,`data`文件夹需要放在训练脚本`train.py`同级目录下。`testA`为存放野马测试图片的文件夹,`testB`为存放斑马测试图片的文件夹,`testA.txt``testB.txt`分别为野马和斑马测试图片路径列表文件,格式如下:
``` ```
testA/n02381460_9243.jpg testA/n02381460_9243.jpg
...@@ -53,7 +55,7 @@ testA/n02381460_9245.jpg ...@@ -53,7 +55,7 @@ testA/n02381460_9245.jpg
在GPU单卡上训练: 在GPU单卡上训练:
``` ```
env CUDA_VISIABLE_DEVICES=0 python train.py env CUDA_VISIBLE_DEVICES=0 python train.py
``` ```
执行`python train.py --help`可查看更多使用方式和参数详细说明。 执行`python train.py --help`可查看更多使用方式和参数详细说明。
...@@ -72,7 +74,7 @@ env CUDA_VISIABLE_DEVICES=0 python train.py ...@@ -72,7 +74,7 @@ env CUDA_VISIABLE_DEVICES=0 python train.py
``` ```
env CUDA_VISIBLE_DEVICE=0 python infer.py \ env CUDA_VISIBLE_DEVICE=0 python infer.py \
--model_path="models/1" --input="./data/inputA/*" \ --init_model="models/1" --input="./data/inputA/*" \
--output="./output" --output="./output"
``` ```
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
from PIL import Image from PIL import Image
import numpy as np import numpy as np
from itertools import izip
A_LIST_FILE = "./data/horse2zebra/trainA.txt" A_LIST_FILE = "./data/horse2zebra/trainA.txt"
B_LIST_FILE = "./data/horse2zebra/trainB.txt" B_LIST_FILE = "./data/horse2zebra/trainB.txt"
...@@ -70,11 +72,3 @@ def b_test_reader(): ...@@ -70,11 +72,3 @@ def b_test_reader():
Reader of images with B style for test. Reader of images with B style for test.
""" """
return reader_creater(B_TEST_LIST_FILE, cycle=False, return_name=True) return reader_creater(B_TEST_LIST_FILE, cycle=False, return_name=True)
if __name__ == "__main__":
for A, B in izip(a_test_reader()(), a_test_reader()()):
print A[0].shape
print A[1]
print B[0].shape
print B[1]
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import data_reader import data_reader
import os import os
import random import random
...@@ -9,7 +12,6 @@ import paddle.fluid as fluid ...@@ -9,7 +12,6 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
from paddle.fluid import core from paddle.fluid import core
from trainer import * from trainer import *
from itertools import izip
from scipy.misc import imsave from scipy.misc import imsave
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, ImagePool from utility import add_arguments, print_arguments, ImagePool
...@@ -66,7 +68,7 @@ def train(args): ...@@ -66,7 +68,7 @@ def train(args):
if not os.path.exists(out_path): if not os.path.exists(out_path):
os.makedirs(out_path) os.makedirs(out_path)
i = 0 i = 0
for data_A, data_B in izip(A_test_reader(), B_test_reader()): for data_A, data_B in zip(A_test_reader(), B_test_reader()):
A_name = data_A[1] A_name = data_A[1]
B_name = data_B[1] B_name = data_B[1]
tensor_A = core.LoDTensor() tensor_A = core.LoDTensor()
...@@ -114,7 +116,7 @@ def train(args): ...@@ -114,7 +116,7 @@ def train(args):
exe, out_path + "/d_a", main_program=d_A_trainer.program) exe, out_path + "/d_a", main_program=d_A_trainer.program)
fluid.io.save_persistables( fluid.io.save_persistables(
exe, out_path + "/d_b", main_program=d_B_trainer.program) exe, out_path + "/d_b", main_program=d_B_trainer.program)
print "saved checkpoint to [%s]" % out_path print("saved checkpoint to {}".format(out_path))
sys.stdout.flush() sys.stdout.flush()
def init_model(): def init_model():
...@@ -128,7 +130,7 @@ def train(args): ...@@ -128,7 +130,7 @@ def train(args):
exe, args.init_model + "/d_a", main_program=d_A_trainer.program) exe, args.init_model + "/d_a", main_program=d_A_trainer.program)
fluid.io.load_persistables( fluid.io.load_persistables(
exe, args.init_model + "/d_b", main_program=d_B_trainer.program) exe, args.init_model + "/d_b", main_program=d_B_trainer.program)
print "Load model from [%s]" % args.init_model print("Load model from {}".format(args.init_model))
if args.init_model: if args.init_model:
init_model() init_model()
...@@ -136,8 +138,8 @@ def train(args): ...@@ -136,8 +138,8 @@ def train(args):
for epoch in range(args.epoch): for epoch in range(args.epoch):
batch_id = 0 batch_id = 0
for i in range(max_images_num): for i in range(max_images_num):
data_A = A_reader.next() data_A = next(A_reader)
data_B = B_reader.next() data_B = next(B_reader)
tensor_A = core.LoDTensor() tensor_A = core.LoDTensor()
tensor_B = core.LoDTensor() tensor_B = core.LoDTensor()
tensor_A.set(data_A, place) tensor_A.set(data_A, place)
...@@ -174,9 +176,9 @@ def train(args): ...@@ -174,9 +176,9 @@ def train(args):
feed={"input_A": tensor_A, feed={"input_A": tensor_A,
"fake_pool_A": fake_pool_A}) "fake_pool_A": fake_pool_A})
print "epoch[%d]; batch[%d]; g_A_loss: %s; d_B_loss: %s; g_B_loss: %s; d_A_loss: %s;" % ( print("epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {};".format(
epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0], epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0],
d_A_loss[0]) d_A_loss[0]))
sys.stdout.flush() sys.stdout.flush()
batch_id += 1 batch_id += 1
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from model import * from model import *
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -17,6 +17,7 @@ from __future__ import absolute_import ...@@ -17,6 +17,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util import distutils.util
import six
import random import random
import glob import glob
import numpy as np import numpy as np
...@@ -39,7 +40,7 @@ def print_arguments(args): ...@@ -39,7 +40,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
......
...@@ -8,7 +8,7 @@ import os ...@@ -8,7 +8,7 @@ import os
import cv2 import cv2
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
from icnet import icnet from icnet import icnet
from utils import add_arguments, print_arguments, get_feeder_data from utils import add_arguments, print_arguments, get_feeder_data
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
...@@ -111,10 +111,10 @@ def infer(args): ...@@ -111,10 +111,10 @@ def infer(args):
for line in open(args.images_list): for line in open(args.images_list):
image_file = args.images_path + "/" + line.strip() image_file = args.images_path + "/" + line.strip()
filename = os.path.basename(image_file) filename = os.path.basename(image_file)
image = paddle.image.load_image( image = paddle.dataset.image.load_image(
image_file, is_color=True).astype("float32") image_file, is_color=True).astype("float32")
image -= IMG_MEAN image -= IMG_MEAN
img = paddle.image.to_chw(image)[np.newaxis, :] img = paddle.dataset.image.to_chw(image)[np.newaxis, :]
image_t = fluid.core.LoDTensor() image_t = fluid.core.LoDTensor()
image_t.set(img, place) image_t.set(img, place)
result = exe.run(inference_program, result = exe.run(inference_program,
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
## 安装 ## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的v0.13.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明来更新PaddlePaddle。 在当前目录下运行样例代码需要PadddlePaddle Fluid的v0.13.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据安装文档中的说明来更新PaddlePaddle。
## 数据准备 ## 数据准备
......
...@@ -8,7 +8,7 @@ import sys ...@@ -8,7 +8,7 @@ import sys
import os import os
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
def test_model(exe, test_program, fetch_list, test_reader, feeder): def test_model(exe, test_program, fetch_list, test_reader, feeder):
......
...@@ -52,7 +52,7 @@ In this example, we launched 4 parameter server instances and 4 trainer instance ...@@ -52,7 +52,7 @@ In this example, we launched 4 parameter server instances and 4 trainer instance
1. launch trainer process 1. launch trainer process
``` python ``` python
PADDLE_TRAINING_ROLE=PSERVER \ PADDLE_TRAINING_ROLE=TRAINER \
PADDLE_TRAINERS=4 \ PADDLE_TRAINERS=4 \
PADDLE_PSERVER_IPS=192.168.0.100,192.168.0.101,192.168.0.102,192.168.0.103 \ PADDLE_PSERVER_IPS=192.168.0.100,192.168.0.101,192.168.0.102,192.168.0.103 \
PADDLE_TRAINER_ID=0 \ PADDLE_TRAINER_ID=0 \
...@@ -110,4 +110,4 @@ Training acc1 curves ...@@ -110,4 +110,4 @@ Training acc1 curves
### Performance ### Performance
TBD TBD
\ No newline at end of file
...@@ -22,6 +22,7 @@ import numpy as np ...@@ -22,6 +22,7 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import six
import sys import sys
sys.path.append("..") sys.path.append("..")
import models import models
...@@ -172,7 +173,7 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog): ...@@ -172,7 +173,7 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
def test_parallel(exe, test_args, args, test_prog, feeder): def test_parallel(exe, test_args, args, test_prog, feeder):
acc_evaluators = [] acc_evaluators = []
for i in xrange(len(test_args[2])): for i in six.moves.xrange(len(test_args[2])):
acc_evaluators.append(fluid.metrics.Accuracy()) acc_evaluators.append(fluid.metrics.Accuracy())
to_fetch = [v.name for v in test_args[2]] to_fetch = [v.name for v in test_args[2]]
...@@ -291,7 +292,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog, ...@@ -291,7 +292,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
def print_arguments(args): def print_arguments(args):
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -307,7 +308,7 @@ def print_paddle_envs(): ...@@ -307,7 +308,7 @@ def print_paddle_envs():
print('----------- Configuration envs -----------') print('----------- Configuration envs -----------')
for k in os.environ: for k in os.environ:
if "PADDLE_" in k: if "PADDLE_" in k:
print "ENV %s:%s" % (k, os.environ[k]) print("ENV %s:%s" % (k, os.environ[k]))
print('------------------------------------------------') print('------------------------------------------------')
......
...@@ -140,7 +140,7 @@ def _reader_creator(file_list, ...@@ -140,7 +140,7 @@ def _reader_creator(file_list,
# distributed mode if the env var `PADDLE_TRAINING_ROLE` exits # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
trainer_count = int(os.getenv("PADDLE_TRAINERS", "1")) trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
per_node_lines = len(full_lines) / trainer_count per_node_lines = len(full_lines) // trainer_count
lines = full_lines[trainer_id * per_node_lines:(trainer_id + 1) lines = full_lines[trainer_id * per_node_lines:(trainer_id + 1)
* per_node_lines] * per_node_lines]
print( print(
......
...@@ -33,7 +33,7 @@ add_arg('lr', float, 0.1, "set learning rate.") ...@@ -33,7 +33,7 @@ add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.") add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
add_arg('data_dir' str, "./data/ILSVRC2012", "The ImageNet dataset root dir.") add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
# yapf: enable # yapf: enable
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
......
...@@ -4,7 +4,6 @@ import random ...@@ -4,7 +4,6 @@ import random
import cPickle import cPickle
import functools import functools
import numpy as np import numpy as np
#import paddle.v2 as paddle
import paddle import paddle
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
......
#!/bin/bash #!/bin/bash
DATA_PATH=$HOME/.cache/paddle/dataset/wmt16 DATA_PATH=$HOME/.cache/paddle/dataset/wmt16
if [ ! -d $DATA_PATH/en_10000.dict ] ; then if [ ! -e $DATA_PATH/en_10000.dict ] ; then
python -c 'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")().next()' python -c 'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")().next()'
tar -zxf $DATA_PATH/wmt16.tar.gz -C $DATA_PATH tar -zxf $DATA_PATH/wmt16.tar.gz -C $DATA_PATH
fi fi
......
...@@ -63,7 +63,7 @@ WMT 数据集是机器翻译领域公认的主流数据集;WMT 英德和英法 ...@@ -63,7 +63,7 @@ WMT 数据集是机器翻译领域公认的主流数据集;WMT 英德和英法
#### WMT 英德翻译数据 #### WMT 英德翻译数据
[WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2013.tok.bpe.32000.en``newstest2013.tok.bpe.32000.de` 等为测试数据(`newstest2013.tok.en``newstest2013.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。 [WMT'16 EN-DE 数据集](http://www.statmt.org/wmt16/translation-task.html)是一个中等规模的数据集。参照论文,英德数据集我们使用 BPE 编码的数据,这能够更好的解决未登录词(out-of-vocabulary,OOV)的问题[4]。用到的 BPE 数据可以参照[这里](https://github.com/google/seq2seq/blob/master/docs/data.md)进行下载(如果希望在自定义数据中使用 BPE 编码,可以参照[这里](https://github.com/rsennrich/subword-nmt)进行预处理),下载后解压,其中 `train.tok.clean.bpe.32000.en``train.tok.clean.bpe.32000.de` 为使用 BPE 的训练数据(平行语料,分别对应了英语和德语,经过了 tokenize 和 BPE 的处理),`newstest2016.tok.bpe.32000.en``newstest2016.tok.bpe.32000.de` 等为测试数据(`newstest2016.tok.en``newstest2016.tok.de` 等则为对应的未使用 BPE 的测试数据),`vocab.bpe.32000` 为相应的词典文件(源语言和目标语言共享该词典文件)。
由于本示例中的数据读取脚本 `reader.py` 默认使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(默认句子中的词之间使用空格分隔),因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并: 由于本示例中的数据读取脚本 `reader.py` 默认使用的样本数据的格式为 `\t` 分隔的的源语言和目标语言句子对(默认句子中的词之间使用空格分隔),因此需要将源语言到目标语言的平行语料库文件合并为一个文件,可以执行以下命令进行合并:
```sh ```sh
...@@ -91,7 +91,7 @@ python -u train.py \ ...@@ -91,7 +91,7 @@ python -u train.py \
--train_file_pattern data/train.tok.clean.bpe.32000.en-de \ --train_file_pattern data/train.tok.clean.bpe.32000.en-de \
--token_delimiter ' ' \ --token_delimiter ' ' \
--use_token_batch True \ --use_token_batch True \
--batch_size 3200 \ --batch_size 4096 \
--sort_type pool \ --sort_type pool \
--pool_size 200000 --pool_size 200000
``` ```
...@@ -100,7 +100,7 @@ python -u train.py \ ...@@ -100,7 +100,7 @@ python -u train.py \
python train.py --help python train.py --help
``` ```
更多模型训练相关的参数则在 `config.py` 中的 `ModelHyperParams``TrainTaskConfig` 内定义;`ModelHyperParams` 定义了 embedding 维度等模型超参数,`TrainTaskConfig` 定义了 warmup 步数等训练需要的参数。这些参数默认使用了 Transformer 论文中 base model 的配置,如需调整可以在该脚本中进行修改。另外这些参数同样可在执行训练脚本的命令行中设置,传入的配置会合并并覆盖 `config.py` 中的配置,如可以通过以下命令来训练 Transformer 论文中的 big model : 更多模型训练相关的参数则在 `config.py` 中的 `ModelHyperParams``TrainTaskConfig` 内定义;`ModelHyperParams` 定义了 embedding 维度等模型超参数,`TrainTaskConfig` 定义了 warmup 步数等训练需要的参数。这些参数默认使用了 Transformer 论文中 base model 的配置,如需调整可以在该脚本中进行修改。另外这些参数同样可在执行训练脚本的命令行中设置,传入的配置会合并并覆盖 `config.py` 中的配置,如可以通过以下命令来训练 Transformer 论文中的 big model (如显存不够可适当减小 batch size 的值)
```sh ```sh
python -u train.py \ python -u train.py \
...@@ -117,22 +117,23 @@ python -u train.py \ ...@@ -117,22 +117,23 @@ python -u train.py \
n_head 16 \ n_head 16 \
d_model 1024 \ d_model 1024 \
d_inner_hid 4096 \ d_inner_hid 4096 \
dropout 0.3 n_head 16 \
prepostprocess_dropout 0.3
``` ```
有关这些参数更详细信息的请参考 `config.py` 中的注释说明。对于英法翻译数据,执行训练和英德翻译训练类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外要注意的是由于英法翻译数据 token 间不是使用空格进行分隔,需要修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'` 有关这些参数更详细信息的请参考 `config.py` 中的注释说明。对于英法翻译数据,执行训练和英德翻译训练类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外要注意的是由于英法翻译数据 token 间不是使用空格进行分隔,需要修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`
训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每个 epoch 结束后将保存模型到参数 `model_dir` 指定的目录,每个 epoch 内也会每隔1000个 iteration 进行一次保存,每个 iteration 将打印如下的日志到标准输出: 训练时默认使用所有 GPU,可以通过 `CUDA_VISIBLE_DEVICES` 环境变量来设置使用的 GPU 数目。也可以只使用 CPU 训练(通过参数 `--divice CPU` 设置),训练速度相对较慢。在训练过程中,每隔一定 iteration 后(通过参数 `save_freq` 设置,默认为10000)保存模型到参数 `model_dir` 指定的目录,每个 epoch 结束后也会保存 checkpiont 到 `ckpt_dir` 指定的目录,每个 iteration 将打印如下的日志到标准输出:
```txt ```txt
epoch: 0, batch: 0, sum loss: 258793.343750, avg loss: 11.069005, ppl: 64151.644531 step_idx: 0, epoch: 0, batch: 0, avg loss: 11.059394, normalized loss: 9.682427, ppl: 63538.027344
epoch: 0, batch: 1, sum loss: 256140.718750, avg loss: 11.059616, ppl: 63552.148438 step_idx: 1, epoch: 0, batch: 1, avg loss: 11.053112, normalized loss: 9.676146, ppl: 63140.144531
epoch: 0, batch: 2, sum loss: 258931.093750, avg loss: 11.064013, ppl: 63832.167969 step_idx: 2, epoch: 0, batch: 2, avg loss: 11.054576, normalized loss: 9.677609, ppl: 63232.640625
epoch: 0, batch: 3, sum loss: 256837.875000, avg loss: 11.058206, ppl: 63462.574219 step_idx: 3, epoch: 0, batch: 3, avg loss: 11.046638, normalized loss: 9.669671, ppl: 62732.664062
epoch: 0, batch: 4, sum loss: 256461.000000, avg loss: 11.053401, ppl: 63158.390625 step_idx: 4, epoch: 0, batch: 4, avg loss: 11.030095, normalized loss: 9.653129, ppl: 61703.449219
epoch: 0, batch: 5, sum loss: 257064.562500, avg loss: 11.019099, ppl: 61028.683594 step_idx: 5, epoch: 0, batch: 5, avg loss: 11.047491, normalized loss: 9.670525, ppl: 62786.230469
epoch: 0, batch: 6, sum loss: 256180.125000, avg loss: 11.008556, ppl: 60388.644531 step_idx: 6, epoch: 0, batch: 6, avg loss: 11.044509, normalized loss: 9.667542, ppl: 62599.273438
epoch: 0, batch: 7, sum loss: 256619.671875, avg loss: 11.007106, ppl: 60301.113281 step_idx: 7, epoch: 0, batch: 7, avg loss: 11.011090, normalized loss: 9.634124, ppl: 60541.859375
epoch: 0, batch: 8, sum loss: 255716.734375, avg loss: 10.966025, ppl: 57874.105469 step_idx: 8, epoch: 0, batch: 8, avg loss: 10.985243, normalized loss: 9.608276, ppl: 58997.058594
epoch: 0, batch: 9, sum loss: 245157.500000, avg loss: 10.966562, ppl: 57905.187500 step_idx: 9, epoch: 0, batch: 9, avg loss: 10.993434, normalized loss: 9.616467, ppl: 59482.292969
``` ```
### 模型预测 ### 模型预测
...@@ -143,19 +144,19 @@ python -u infer.py \ ...@@ -143,19 +144,19 @@ python -u infer.py \
--src_vocab_fpath data/vocab.bpe.32000 \ --src_vocab_fpath data/vocab.bpe.32000 \
--trg_vocab_fpath data/vocab.bpe.32000 \ --trg_vocab_fpath data/vocab.bpe.32000 \
--special_token '<s>' '<e>' '<unk>' \ --special_token '<s>' '<e>' '<unk>' \
--test_file_pattern data/newstest2013.tok.bpe.32000.en-de \ --test_file_pattern data/newstest2016.tok.bpe.32000.en-de \
--use_wordpiece False \ --use_wordpiece False \
--token_delimiter ' ' \ --token_delimiter ' ' \
--batch_size 4 \ --batch_size 32 \
model_path trained_models/pass_20.infer.model \ model_path trained_models/iter_199999.infer.model \
beam_size 5 \ beam_size 4 \
max_out_len 256 max_out_len 255
``` ```
和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。 和模型训练时类似,预测时也需要设置数据和 reader 相关的参数,并可以执行 `python infer.py --help` 查看这些参数的说明(部分参数意义和训练时略有不同);同样可以在预测命令中设置模型超参数,但应与模型训练时的设置一致;此外相比于模型训练,预测时还有一些额外的参数,如需要设置 `model_path` 来给出模型所在目录,可以设置 `beam_size``max_out_len` 来指定 Beam Search 算法的搜索宽度和最大深度(翻译长度),这些参数也可以在 `config.py` 中的 `InferTaskConfig` 内查阅注释说明并进行更改设置。
执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。对于使用 BPE 的英德数据,预测出的翻译结果也将是 BPE 表示的数据,要还原成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中(无需再次 tokenize 处理): 执行以上预测命令会打印翻译结果到标准输出,每行输出是对应行输入的得分最高的翻译。对于使用 BPE 的英德数据,预测出的翻译结果也将是 BPE 表示的数据,要还原成原始的数据(这里指 tokenize 后的数据)才能进行正确的评估,可以使用以下命令来恢复 `predict.txt` 内的翻译结果到 `predict.tok.txt` 中(无需再次 tokenize 处理):
```sh ```sh
sed 's/@@ //g' predict.txt > predict.tok.txt sed -r 's/(@@ )|(@@ ?$)//g' predict.txt > predict.tok.txt
``` ```
对于英法翻译的 wordpiece 数据,执行预测和英德翻译预测类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外需要注意修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`;同时要修改 `use_wordpiece` 参数的设置为 `--use_wordpiece True`,这会在预测时将翻译得到的 wordpiece 数据还原为原始数据输出。为了使用 tokenize 的数据进行评估,还需要对翻译结果进行 tokenize 的处理,[Moses](https://github.com/moses-smt/mosesdecoder) 提供了一系列机器翻译相关的脚本。执行 `git clone https://github.com/moses-smt/mosesdecoder.git` 克隆 mosesdecoder 仓库后,可以使用其中的 `tokenizer.perl` 脚本对 `predict.txt` 内的翻译结果进行 tokenize 处理并输出到 `predict.tok.txt` 中,如下: 对于英法翻译的 wordpiece 数据,执行预测和英德翻译预测类似,修改命令中的词典和数据文件为英法数据相应文件的路径,另外需要注意修改 `token_delimiter` 参数的设置为 `--token_delimiter '\x01'`;同时要修改 `use_wordpiece` 参数的设置为 `--use_wordpiece True`,这会在预测时将翻译得到的 wordpiece 数据还原为原始数据输出。为了使用 tokenize 的数据进行评估,还需要对翻译结果进行 tokenize 的处理,[Moses](https://github.com/moses-smt/mosesdecoder) 提供了一系列机器翻译相关的脚本。执行 `git clone https://github.com/moses-smt/mosesdecoder.git` 克隆 mosesdecoder 仓库后,可以使用其中的 `tokenizer.perl` 脚本对 `predict.txt` 内的翻译结果进行 tokenize 处理并输出到 `predict.tok.txt` 中,如下:
...@@ -163,15 +164,21 @@ sed 's/@@ //g' predict.txt > predict.tok.txt ...@@ -163,15 +164,21 @@ sed 's/@@ //g' predict.txt > predict.tok.txt
perl mosesdecoder/scripts/tokenizer/tokenizer.perl -l fr < predict.txt > predict.tok.txt perl mosesdecoder/scripts/tokenizer/tokenizer.perl -l fr < predict.txt > predict.tok.txt
``` ```
接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2013.tok.de` 数据为例,执行如下命令: 接下来就可以使用参考翻译对翻译结果进行 BLEU 指标的评估了。计算 BLEU 值的脚本也在 Moses 中包含,以英德翻译 `newstest2016.tok.de` 数据为例,执行如下命令:
```sh ```sh
perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2013.tok.de < predict.tok.txt perl mosesdecoder/scripts/generic/multi-bleu.perl data/newstest2016.tok.de < predict.tok.txt
``` ```
可以看到类似如下的结果。 可以看到类似如下的结果(为单机两卡训练 200K 个 iteration 后模型的预测结果)
``` ```
BLEU = 25.08, 58.3/31.5/19.6/12.6 (BP=0.966, ratio=0.967, hyp_len=61321, ref_len=63412) BLEU = 33.08, 64.2/39.2/26.4/18.5 (BP=0.994, ratio=0.994, hyp_len=61971, ref_len=62362)
``` ```
目前在未使用 model average 的情况下,使用默认配置单机八卡(同论文中 base model 的配置)进行训练,英德翻译在 `newstest2013` 上测试 BLEU 值为25.,在 `newstest2014` 上测试 BLEU 值为26.;英法翻译在 `newstest2014` 上测试 BLEU 值为36.。 目前在未使用 model average 的情况下,英德翻译 base model 八卡训练 100K 个 iteration 后测试 BLEU 值如下:
| 测试集 | newstest2013 | newstest2014 | newstest2015 | newstest2016 |
|-|-|-|-|-|
| BLEU | 25.27 | 26.05 | 28.75 | 33.27 |
英法翻译 base model 八卡训练 100K 个 iteration 后在 `newstest2014` 上测试 BLEU 值为36.。
### 分布式训练 ### 分布式训练
......
...@@ -9,12 +9,12 @@ class TrainTaskConfig(object): ...@@ -9,12 +9,12 @@ class TrainTaskConfig(object):
# the hyper parameters for Adam optimizer. # the hyper parameters for Adam optimizer.
# This static learning_rate will be multiplied to the LearningRateScheduler # This static learning_rate will be multiplied to the LearningRateScheduler
# derived learning rate the to get the final learning rate. # derived learning rate the to get the final learning rate.
learning_rate = 1 learning_rate = 2.0
beta1 = 0.9 beta1 = 0.9
beta2 = 0.98 beta2 = 0.997
eps = 1e-9 eps = 1e-9
# the parameters for learning rate scheduling. # the parameters for learning rate scheduling.
warmup_steps = 4000 warmup_steps = 8000
# the weight used to mix up the ground-truth distribution and the fixed # the weight used to mix up the ground-truth distribution and the fixed
# uniform distribution in label smoothing when training. # uniform distribution in label smoothing when training.
# Set this as zero if label smoothing is not wanted. # Set this as zero if label smoothing is not wanted.
...@@ -30,6 +30,8 @@ class TrainTaskConfig(object): ...@@ -30,6 +30,8 @@ class TrainTaskConfig(object):
# It should be provided if use checkpoints, since the checkpoint doesn't # It should be provided if use checkpoints, since the checkpoint doesn't
# include the training step counter currently. # include the training step counter currently.
start_step = 0 start_step = 0
# the frequency to save trained models.
save_freq = 10000
class InferTaskConfig(object): class InferTaskConfig(object):
...@@ -63,7 +65,6 @@ class ModelHyperParams(object): ...@@ -63,7 +65,6 @@ class ModelHyperParams(object):
# index for <unk> token # index for <unk> token
unk_idx = 2 unk_idx = 2
# max length of sequences deciding the size of position encoding table. # max length of sequences deciding the size of position encoding table.
# Start from 1 and count start and end tokens in.
max_length = 256 max_length = 256
# the dimension for word embeddings, which is also the last dimension of # the dimension for word embeddings, which is also the last dimension of
# the input and output of multi-head attention, position-wise feed-forward # the input and output of multi-head attention, position-wise feed-forward
...@@ -79,8 +80,14 @@ class ModelHyperParams(object): ...@@ -79,8 +80,14 @@ class ModelHyperParams(object):
n_head = 8 n_head = 8
# number of sub-layers to be stacked in the encoder and decoder. # number of sub-layers to be stacked in the encoder and decoder.
n_layer = 6 n_layer = 6
# dropout rate used by all dropout layers. # dropout rates of different modules.
dropout = 0.1 prepostprocess_dropout = 0.1
attention_dropout = 0.1
relu_dropout = 0.1
# to process before each sub-layer
preprocess_cmd = "n" # layer normalization
# to process after each sub-layer
postprocess_cmd = "da" # dropout + residual connection
# random seed used in dropout for CE. # random seed used in dropout for CE.
dropout_seed = None dropout_seed = None
# the flag indicating whether to share embedding and softmax weights. # the flag indicating whether to share embedding and softmax weights.
......
...@@ -156,7 +156,9 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece): ...@@ -156,7 +156,9 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout,
ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
ModelHyperParams.weight_sharing, InferTaskConfig.beam_size, ModelHyperParams.weight_sharing, InferTaskConfig.beam_size,
InferTaskConfig.max_out_len, ModelHyperParams.eos_idx) InferTaskConfig.max_out_len, ModelHyperParams.eos_idx)
...@@ -169,7 +171,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece): ...@@ -169,7 +171,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
]) ])
# This is used here to set dropout to the test mode. # This is used here to set dropout to the test mode.
infer_program = fluid.default_main_program().inference_optimize() infer_program = fluid.default_main_program().clone(for_test=True)
for batch_id, data in enumerate(test_data.batch_generator()): for batch_id, data in enumerate(test_data.batch_generator()):
data_input = prepare_batch_input( data_input = prepare_batch_input(
......
import os
import time
import argparse import argparse
import ast import ast
import numpy as np
import multiprocessing import multiprocessing
import os
import six
import time
import paddle import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
from train import split_data, read_multiple, prepare_batch_input
from model import transformer, position_encoding_init
from optim import LearningRateScheduler
from config import *
import reader import reader
from config import *
from train import pad_batch_data, prepare_data_generator, \
prepare_feed_dict_list, py_reader_provider_wrapper
from model import transformer, position_encoding_init
def parse_args(): def parse_args():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser("Training for Transformer.")
"Profile the training process for Transformer.")
parser.add_argument( parser.add_argument(
"--src_vocab_fpath", "--src_vocab_fpath",
type=str, type=str,
...@@ -43,38 +42,70 @@ def parse_args(): ...@@ -43,38 +42,70 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--batch_size", "--batch_size",
type=int, type=int,
default=2048, default=4096,
help="The number of sequences contained in a mini-batch, or the maximum " help="The number of sequences contained in a mini-batch, or the maximum "
"number of tokens (include paddings) contained in a mini-batch. Note " "number of tokens (include paddings) contained in a mini-batch. Note "
"that this represents the number on single device and the actual batch " "that this represents the number on single device and the actual batch "
"size for multi-devices will multiply the device number.") "size for multi-devices will multiply the device number.")
parser.add_argument(
"--num_iters",
type=int,
default=10,
help="The maximum number of iterations profiling over.")
parser.add_argument( parser.add_argument(
"--pool_size", "--pool_size",
type=int, type=int,
default=10000, default=200000,
help="The buffer size to pool data.") help="The buffer size to pool data.")
parser.add_argument(
"--sort_type",
default="pool",
choices=("global", "pool", "none"),
help="The grain to sort by length: global for all instances; pool for "
"instances in pool; none for no sort.")
parser.add_argument(
"--shuffle",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to shuffle instances in each pass.")
parser.add_argument(
"--shuffle_batch",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to shuffle the data batches.")
parser.add_argument( parser.add_argument(
"--special_token", "--special_token",
type=str, type=str,
default=["<s>", "<e>", "<unk>"], default=["<s>", "<e>", "<unk>"],
nargs=3, nargs=3,
help="The <bos>, <eos> and <unk> tokens in the dictionary.") help="The <bos>, <eos> and <unk> tokens in the dictionary.")
parser.add_argument(
"--token_delimiter",
type=lambda x: str(x.encode().decode("unicode-escape")),
default=" ",
help="The delimiter used to split tokens in source or target sentences. "
"For EN-DE BPE data we provided, use spaces as token delimiter. "
"For EN-FR wordpiece data we provided, use '\x01' as token delimiter.")
parser.add_argument(
"--use_mem_opt",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to use memory optimization.")
parser.add_argument(
"--use_py_reader",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to use py_reader.")
parser.add_argument(
"--iter_num",
type=int,
default=20,
help="The iteration number to run in profiling.")
parser.add_argument(
"--use_parallel_exe",
type=bool,
default=False,
help="The flag indicating whether to use ParallelExecutor.")
parser.add_argument( parser.add_argument(
'opts', 'opts',
help='See config.py for all options', help='See config.py for all options',
default=None, default=None,
nargs=argparse.REMAINDER) nargs=argparse.REMAINDER)
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
args = parser.parse_args() args = parser.parse_args()
# Append args related to dict # Append args related to dict
...@@ -91,153 +122,147 @@ def parse_args(): ...@@ -91,153 +122,147 @@ def parse_args():
return args return args
def train_loop(exe, train_progm, init, num_iters, train_data, dev_count, def main(args):
sum_cost, avg_cost, lr_scheduler, token_num, predict): train_prog = fluid.Program()
startup_prog = fluid.Program()
data_input_names = encoder_data_input_fields + decoder_data_input_fields[: with fluid.program_guard(train_prog, startup_prog):
-1] + label_data_input_fields with fluid.unique_name.guard():
sum_cost, avg_cost, predict, token_num, pyreader = transformer(
start_time = time.time() ModelHyperParams.src_vocab_size,
exec_time = 0.0 ModelHyperParams.trg_vocab_size,
for batch_id, data in enumerate(train_data()): ModelHyperParams.max_length + 1,
if batch_id >= num_iters: ModelHyperParams.n_layer,
break ModelHyperParams.n_head,
feed_list = [] ModelHyperParams.d_key,
total_num_token = 0 ModelHyperParams.d_value,
for place_id, data_buffer in enumerate( ModelHyperParams.d_model,
split_data( ModelHyperParams.d_inner_hid,
data, num_part=dev_count)): ModelHyperParams.prepostprocess_dropout,
data_input_dict, num_token = prepare_batch_input( ModelHyperParams.attention_dropout,
data_buffer, data_input_names, ModelHyperParams.eos_idx, ModelHyperParams.relu_dropout,
ModelHyperParams.eos_idx, ModelHyperParams.n_head, ModelHyperParams.preprocess_cmd,
ModelHyperParams.d_model) ModelHyperParams.postprocess_cmd,
total_num_token += num_token ModelHyperParams.weight_sharing,
feed_kv_pairs = data_input_dict.items() TrainTaskConfig.label_smooth_eps,
lr_rate = lr_scheduler.update_learning_rate() use_py_reader=args.use_py_reader,
feed_kv_pairs += {lr_scheduler.learning_rate.name: lr_rate}.items() is_test=False)
feed_list.append(dict(feed_kv_pairs)) lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
ModelHyperParams.d_model, TrainTaskConfig.warmup_steps)
if not init: optimizer = fluid.optimizer.Adam(
for pos_enc_param_name in pos_enc_param_names: learning_rate=lr_decay * TrainTaskConfig.learning_rate,
pos_enc = position_encoding_init( beta1=TrainTaskConfig.beta1,
ModelHyperParams.max_length + 1, beta2=TrainTaskConfig.beta2,
ModelHyperParams.d_model) epsilon=TrainTaskConfig.eps)
feed_list[place_id][pos_enc_param_name] = pos_enc optimizer.minimize(avg_cost)
for feed_dict in feed_list:
feed_dict[sum_cost.name + "@GRAD"] = 1. / total_num_token if args.use_mem_opt:
fluid.memory_optimize(train_prog)
exe_start_time = time.time()
if dev_count > 1: if TrainTaskConfig.use_gpu:
# prallel executor
outs = exe.run(fetch_list=[sum_cost.name, token_num.name],
feed=feed_list)
else:
# executor
outs = exe.run(fetch_list=[sum_cost, token_num], feed=feed_list[0])
exec_time += time.time() - exe_start_time
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1])
total_sum_cost = sum_cost_val.sum() # sum the cost from multi-devices
total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num
print("batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(batch_id, total_sum_cost, total_avg_cost,
np.exp([min(total_avg_cost, 100)])))
init = True
return time.time() - start_time, exec_time
def profile(args):
print args
if args.device == 'CPU':
TrainTaskConfig.use_gpu = False
if not TrainTaskConfig.use_gpu:
place = fluid.CPUPlace()
dev_count = multiprocessing.cpu_count()
else:
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count() dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exe = fluid.Executor(place) exe = fluid.Executor(place)
sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_scheduler.learning_rate,
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost)
# Initialize the parameters. # Initialize the parameters.
if TrainTaskConfig.ckpt_path: if TrainTaskConfig.ckpt_path:
fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path) fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path)
lr_scheduler.current_steps = TrainTaskConfig.start_step
else: else:
exe.run(fluid.framework.default_startup_program()) exe.run(startup_prog)
# Disable all sorts for they will be done in the 1st batch. exec_strategy = fluid.ExecutionStrategy()
train_data = reader.DataReader( # For faster executor
src_vocab_fpath=args.src_vocab_fpath, exec_strategy.use_experimental_executor = True
trg_vocab_fpath=args.trg_vocab_fpath, exec_strategy.num_iteration_per_drop_scope = 5
fpattern=args.train_file_pattern, build_strategy = fluid.BuildStrategy()
use_token_batch=args.use_token_batch, # Since the token number differs among devices, customize gradient scale to
batch_size=args.batch_size * (1 if args.use_token_batch else dev_count), # use token average cost among multi-devices. and the gradient scale is
pool_size=args.pool_size, # `1 / token_number` for average cost.
sort_type='none', build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
shuffle=False, train_exe = fluid.ParallelExecutor(
shuffle_batch=False, use_cuda=TrainTaskConfig.use_gpu,
start_mark=args.special_token[0], loss_name=avg_cost.name,
end_mark=args.special_token[1], main_program=train_prog,
unk_mark=args.special_token[2], build_strategy=build_strategy,
# count start and end tokens out exec_strategy=exec_strategy)
max_length=ModelHyperParams.max_length - 2,
clip_last_batch=False) # the best cross-entropy value with label smoothing
train_data = read_multiple( loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log(
reader=train_data.batch_generator, (1. - TrainTaskConfig.label_smooth_eps
count=dev_count if args.use_token_batch else 1) )) + TrainTaskConfig.label_smooth_eps *
np.log(TrainTaskConfig.label_smooth_eps / (
if dev_count > 1: ModelHyperParams.trg_vocab_size - 1) + 1e-20))
build_strategy = fluid.BuildStrategy()
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized train_data = prepare_data_generator(
train_exe = fluid.ParallelExecutor( args, is_test=False, count=dev_count, pyreader=pyreader)
use_cuda=TrainTaskConfig.use_gpu, if args.use_py_reader:
loss_name=sum_cost.name, pyreader.start()
main_program=fluid.default_main_program(), data_generator = None
build_strategy=build_strategy)
print("Warming up ...")
train_loop(exe if dev_count == 1 else train_exe,
fluid.default_main_program(), False, 3, train_data, dev_count,
sum_cost, avg_cost, lr_scheduler, token_num, predict)
print("\nProfiling ...")
if dev_count == 1:
with profiler.profiler('All', 'total', '/tmp/profile_file'):
total_time, exec_time = train_loop(
exe,
fluid.default_main_program(), True, args.num_iters, train_data,
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict)
else: else:
total_time, exec_time = train_loop( data_generator = train_data()
train_exe,
fluid.default_main_program(), True, args.num_iters, train_data, def run(iter_num):
dev_count, sum_cost, avg_cost, lr_scheduler, token_num, predict) reader_time = []
print("Elapsed time: total %f s, in executor %f s" % run_time = []
(total_time, exec_time))
for step_idx in six.moves.xrange(iter_num):
try:
start_time = time.time()
feed_dict_list = prepare_feed_dict_list(data_generator,
init_flag, dev_count)
end_time = time.time()
reader_time.append(end_time - start_time)
start_time = time.time()
if args.use_parallel_exe:
outs = train_exe.run(
fetch_list=[sum_cost.name, token_num.name],
feed=feed_dict_list)
else:
outs = exe.run(program=train_prog,
fetch_list=[sum_cost.name, token_num.name],
feed=feed_dict_list[0]
if feed_dict_list is not None else None)
end_time = time.time()
run_time.append(end_time - start_time)
sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[
1])
# sum the cost from multi-devices
total_sum_cost = sum_cost_val.sum()
total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num
print("step_idx: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f" %
(step_idx, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)])))
except (StopIteration, fluid.core.EOFException):
# The current pass is over.
if args.use_py_reader:
pyreader.reset()
pyreader.start()
break
return reader_time, run_time
# start-up
init_flag = True
run(1)
init_flag = False
# profiling
start = time.time()
# currently only support profiling on one device
with profiler.profiler('All', 'total', '/tmp/profile_file'):
reader_time, run_time = run(args.iter_num)
end = time.time()
total_time = end - start
print("Total time: {0}, reader time: {1} s, run time: {2} s".format(
total_time, np.sum(reader_time), np.sum(run_time)))
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
profile(args) main(args)
import glob import glob
import six
import os import os
import tarfile import tarfile
...@@ -12,15 +13,16 @@ class SortType(object): ...@@ -12,15 +13,16 @@ class SortType(object):
class Converter(object): class Converter(object):
def __init__(self, vocab, beg, end, unk, delimiter): def __init__(self, vocab, beg, end, unk, delimiter, add_beg):
self._vocab = vocab self._vocab = vocab
self._beg = beg self._beg = beg
self._end = end self._end = end
self._unk = unk self._unk = unk
self._delimiter = delimiter self._delimiter = delimiter
self._add_beg = add_beg
def __call__(self, sentence): def __call__(self, sentence):
return [self._beg] + [ return ([self._beg] if self._add_beg else []) + [
self._vocab.get(w, self._unk) self._vocab.get(w, self._unk)
for w in sentence.split(self._delimiter) for w in sentence.split(self._delimiter)
] + [self._end] ] + [self._end]
...@@ -215,7 +217,8 @@ class DataReader(object): ...@@ -215,7 +217,8 @@ class DataReader(object):
beg=self._src_vocab[start_mark], beg=self._src_vocab[start_mark],
end=self._src_vocab[end_mark], end=self._src_vocab[end_mark],
unk=self._src_vocab[unk_mark], unk=self._src_vocab[unk_mark],
delimiter=self._token_delimiter) delimiter=self._token_delimiter,
add_beg=False)
] ]
if not self._only_src: if not self._only_src:
converters.append( converters.append(
...@@ -224,7 +227,8 @@ class DataReader(object): ...@@ -224,7 +227,8 @@ class DataReader(object):
beg=self._trg_vocab[start_mark], beg=self._trg_vocab[start_mark],
end=self._trg_vocab[end_mark], end=self._trg_vocab[end_mark],
unk=self._trg_vocab[unk_mark], unk=self._trg_vocab[unk_mark],
delimiter=self._token_delimiter)) delimiter=self._token_delimiter,
add_beg=True))
converters = ComposedConverter(converters) converters = ComposedConverter(converters)
...@@ -259,8 +263,10 @@ class DataReader(object): ...@@ -259,8 +263,10 @@ class DataReader(object):
if not os.path.isfile(fpath): if not os.path.isfile(fpath):
raise IOError("Invalid file: %s" % fpath) raise IOError("Invalid file: %s" % fpath)
with open(fpath, "r") as f: with open(fpath, "rb") as f:
for line in f: for line in f:
if six.PY3:
line = line.decode()
fields = line.strip("\n").split(self._field_delimiter) fields = line.strip("\n").split(self._field_delimiter)
if (not self._only_src and len(fields) == 2) or ( if (not self._only_src and len(fields) == 2) or (
self._only_src and len(fields) == 1): self._only_src and len(fields) == 1):
...@@ -269,8 +275,10 @@ class DataReader(object): ...@@ -269,8 +275,10 @@ class DataReader(object):
@staticmethod @staticmethod
def load_dict(dict_path, reverse=False): def load_dict(dict_path, reverse=False):
word_dict = {} word_dict = {}
with open(dict_path, "r") as fdict: with open(dict_path, "rb") as fdict:
for idx, line in enumerate(fdict): for idx, line in enumerate(fdict):
if six.PY3:
line = line.decode()
if reverse: if reverse:
word_dict[idx] = line.strip("\n") word_dict[idx] = line.strip("\n")
else: else:
...@@ -280,8 +288,7 @@ class DataReader(object): ...@@ -280,8 +288,7 @@ class DataReader(object):
def batch_generator(self): def batch_generator(self):
# global sort or global shuffle # global sort or global shuffle
if self._sort_type == SortType.GLOBAL: if self._sort_type == SortType.GLOBAL:
infos = sorted( infos = sorted(self._sample_infos, key=lambda x: x.max_len)
self._sample_infos, key=lambda x: x.max_len, reverse=True)
else: else:
if self._shuffle: if self._shuffle:
infos = self._sample_infos infos = self._sample_infos
......
...@@ -20,3 +20,4 @@ data/pascalvoc/trainval.txt ...@@ -20,3 +20,4 @@ data/pascalvoc/trainval.txt
log* log*
*.log *.log
ssd_mobilenet_v1_pascalvoc*
...@@ -38,7 +38,8 @@ train_parameters = { ...@@ -38,7 +38,8 @@ train_parameters = {
"batch_size": 64, "batch_size": 64,
"lr": 0.001, "lr": 0.001,
"lr_epochs": [40, 60, 80, 100], "lr_epochs": [40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01] "lr_decay": [1, 0.5, 0.25, 0.1, 0.01],
"ap_version": '11point',
}, },
"coco2014": { "coco2014": {
"train_images": 82783, "train_images": 82783,
...@@ -47,7 +48,8 @@ train_parameters = { ...@@ -47,7 +48,8 @@ train_parameters = {
"batch_size": 64, "batch_size": 64,
"lr": 0.001, "lr": 0.001,
"lr_epochs": [12, 19], "lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25] "lr_decay": [1, 0.5, 0.25],
"ap_version": 'integral', # should use eval_coco_map.py to test model
}, },
"coco2017": { "coco2017": {
"train_images": 118287, "train_images": 118287,
...@@ -56,7 +58,8 @@ train_parameters = { ...@@ -56,7 +58,8 @@ train_parameters = {
"batch_size": 64, "batch_size": 64,
"lr": 0.001, "lr": 0.001,
"lr_epochs": [12, 19], "lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25] "lr_decay": [1, 0.5, 0.25],
"ap_version": 'integral', # should use eval_coco_map.py to test model
} }
} }
...@@ -77,6 +80,7 @@ def optimizer_setting(train_params): ...@@ -77,6 +80,7 @@ def optimizer_setting(train_params):
def build_program(main_prog, startup_prog, train_params, is_train): def build_program(main_prog, startup_prog, train_params, is_train):
image_shape = train_params['image_shape'] image_shape = train_params['image_shape']
class_num = train_params['class_num'] class_num = train_params['class_num']
ap_version = train_params['ap_version']
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader( py_reader = fluid.layers.py_reader(
capacity=64, capacity=64,
...@@ -97,16 +101,15 @@ def build_program(main_prog, startup_prog, train_params, is_train): ...@@ -97,16 +101,15 @@ def build_program(main_prog, startup_prog, train_params, is_train):
nmsed_out = fluid.layers.detection_output( nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45) locs, confs, box, box_var, nms_threshold=0.45)
with fluid.program_guard(main_prog): loss = fluid.evaluator.DetectionMAP(
loss = fluid.evaluator.DetectionMAP( nmsed_out,
nmsed_out, gt_label,
gt_label, gt_box,
gt_box, difficult,
difficult, class_num,
class_num, overlap_threshold=0.5,
overlap_threshold=0.5, evaluate_difficult=False,
evaluate_difficult=False, ap_version=ap_version)
ap_version=args.ap_version)
return py_reader, loss return py_reader, loss
...@@ -126,7 +129,7 @@ def train(args, ...@@ -126,7 +129,7 @@ def train(args,
devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(",")) devices_num = len(devices.split(","))
batch_size = train_params['batch_size'] batch_size = train_params['batch_size']
epoc_num = train_params['epoch_num'] epoc_num = train_params['epoc_num']
batch_size_per_device = batch_size // devices_num batch_size_per_device = batch_size // devices_num
iters_per_epoc = train_params["train_images"] // batch_size iters_per_epoc = train_params["train_images"] // batch_size
num_workers = 8 num_workers = 8
...@@ -230,7 +233,7 @@ def train(args, ...@@ -230,7 +233,7 @@ def train(args,
loss_v = np.mean(np.array(loss_v)) loss_v = np.mean(np.array(loss_v))
every_epoc_loss.append(loss_v) every_epoc_loss.append(loss_v)
if batch_id % 20 == 0: if batch_id % 20 == 0:
print("Epoc {0}, batch {1}, loss {2}, time {3}".format( print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
epoc_id, batch_id, loss_v, start_time - prev_start_time)) epoc_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time() end_time = time.time()
total_time += end_time - start_time total_time += end_time - start_time
......
...@@ -2,6 +2,7 @@ from __future__ import absolute_import ...@@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
import six
decoder_size = 128 decoder_size = 128
word_vector_dim = 128 word_vector_dim = 128
...@@ -22,7 +23,7 @@ def conv_bn_pool(input, ...@@ -22,7 +23,7 @@ def conv_bn_pool(input,
pool=True, pool=True,
use_cudnn=True): use_cudnn=True):
tmp = input tmp = input
for i in xrange(group): for i in six.moves.xrange(group):
filter_size = 3 filter_size = 3
conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5 conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5
conv_param = fluid.ParamAttr( conv_param = fluid.ParamAttr(
......
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
from attention_model import attention_eval from attention_model import attention_eval
......
from __future__ import print_function from __future__ import print_function
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
......
import numpy as np import numpy as np
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
# reproducible # reproducible
np.random.seed(1) np.random.seed(1)
......
...@@ -111,7 +111,6 @@ According to the congfiguration of evaluation, the output log is like: ...@@ -111,7 +111,6 @@ According to the congfiguration of evaluation, the output log is like:
Inference is used to get prediction score or video features based on trained models. Inference is used to get prediction score or video features based on trained models.
``` ```
python infer.py \ python infer.py \
--batch_size=128 \
--class_dim=101 \ --class_dim=101 \
--image_shape=3,224,224 \ --image_shape=3,224,224 \
--with_mem_opt=True \ --with_mem_opt=True \
......
...@@ -9,27 +9,33 @@ for line in f.readlines(): ...@@ -9,27 +9,33 @@ for line in f.readlines():
dd[name.lower()] = int(label) - 1 dd[name.lower()] = int(label) - 1
f.close() f.close()
# generate pkl
path = 'train/' def generate_pkl(mode):
savepath = 'train_pkl/' # generate pkl
if not os.path.exists(savepath): path = '%s/' % mode
os.makedirs(savepath) savepath = '%s_pkl/' % mode
if not os.path.exists(savepath):
fw = open('train.list', 'w') os.makedirs(savepath)
for folder in os.listdir(path):
vidid = folder.split('_', 1)[1] fw = open('%s.list' % mode, 'w')
this_label = dd[folder.split('_')[1].lower()] for folder in os.listdir(path):
this_feat = [] vidid = folder.split('_', 1)[1]
for img in sorted(os.listdir(path + folder)): this_label = dd[folder.split('_')[1].lower()]
fout = open(path + folder + '/' + img, 'rb') this_feat = []
this_feat.append(fout.read()) for img in sorted(os.listdir(path + folder)):
fout.close() fout = open(path + folder + '/' + img, 'rb')
this_feat.append(fout.read())
res = [vidid, this_label, this_feat] fout.close()
outp = open(savepath + vidid + '.pkl', 'wb') res = [vidid, this_label, this_feat]
cPickle.dump(res, outp, protocol=cPickle.HIGHEST_PROTOCOL)
outp.close() outp = open(savepath + vidid + '.pkl', 'wb')
cPickle.dump(res, outp, protocol=cPickle.HIGHEST_PROTOCOL)
fw.write('data/train_pkl/%s.pkl\n' % vidid) outp.close()
fw.close()
fw.write('data/%s/%s.pkl\n' % (savepath, vidid))
fw.close()
generate_pkl('train')
generate_pkl('test')
...@@ -2,7 +2,7 @@ import os ...@@ -2,7 +2,7 @@ import os
import numpy as np import numpy as np
import time import time
import sys import sys
import paddle.v2 as paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from resnet import TSN_ResNet from resnet import TSN_ResNet
import reader import reader
......
...@@ -2,7 +2,7 @@ import os ...@@ -2,7 +2,7 @@ import os
import numpy as np import numpy as np
import time import time
import sys import sys
import paddle.v2 as paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from resnet import TSN_ResNet from resnet import TSN_ResNet
import reader import reader
......
...@@ -5,7 +5,7 @@ import functools ...@@ -5,7 +5,7 @@ import functools
import cPickle import cPickle
from cStringIO import StringIO from cStringIO import StringIO
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
random.seed(0) random.seed(0)
...@@ -16,8 +16,8 @@ THREAD = 8 ...@@ -16,8 +16,8 @@ THREAD = 8
BUF_SIZE = 1024 BUF_SIZE = 1024
TRAIN_LIST = 'data/train.list' TRAIN_LIST = 'data/train.list'
TEST_LIST = 'data/val.list' TEST_LIST = 'data/test.list'
INFER_LIST = 'data/val.list' INFER_LIST = 'data/test.list'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
......
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import numpy as np import numpy as np
import time import time
import sys import sys
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from resnet import TSN_ResNet from resnet import TSN_ResNet
import reader import reader
......
...@@ -12,23 +12,23 @@ The word embedding expresses words with a real vector. Each dimension of the vec ...@@ -12,23 +12,23 @@ The word embedding expresses words with a real vector. Each dimension of the vec
In the example of word vectors, we show how to use Hierarchical-Sigmoid and Noise Contrastive Estimation (NCE) to accelerate word-vector learning. In the example of word vectors, we show how to use Hierarchical-Sigmoid and Noise Contrastive Estimation (NCE) to accelerate word-vector learning.
- 1.1 [Hsigmoid Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/v2/hsigmoid) - 1.1 [Hsigmoid Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/legacy/hsigmoid)
- 1.2 [Noise Contrastive Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/v2/nce_cost) - 1.2 [Noise Contrastive Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/legacy/nce_cost)
## 2. RNN language model ## 2. RNN language model
The language model is important in the field of natural language processing. In addition to getting the word vector (a by-product of language model training), it can also help us to generate text. Given a number of words, the language model can help us predict the next most likely word. In the example of using the language model to generate text, we focus on the recurrent neural network language model. We can use the instructions in the document quickly adapt to their training corpus, complete automatic writing poetry, automatic writing prose and other interesting models. The language model is important in the field of natural language processing. In addition to getting the word vector (a by-product of language model training), it can also help us to generate text. Given a number of words, the language model can help us predict the next most likely word. In the example of using the language model to generate text, we focus on the recurrent neural network language model. We can use the instructions in the document quickly adapt to their training corpus, complete automatic writing poetry, automatic writing prose and other interesting models.
- 2.1 [Generate text using the RNN language model](https://github.com/PaddlePaddle/models/tree/develop/v2/generate_sequence_by_rnn_lm) - 2.1 [Generate text using the RNN language model](https://github.com/PaddlePaddle/models/tree/develop/legacy/generate_sequence_by_rnn_lm)
## 3. Click-Through Rate prediction ## 3. Click-Through Rate prediction
The click-through rate model predicts the probability that a user will click on an ad. This is widely used for advertising technology. Logistic Regression has a good learning performance for large-scale sparse features in the early stages of the development of click-through rate prediction. In recent years, DNN model because of its strong learning ability to gradually take the banner rate of the task of the banner. The click-through rate model predicts the probability that a user will click on an ad. This is widely used for advertising technology. Logistic Regression has a good learning performance for large-scale sparse features in the early stages of the development of click-through rate prediction. In recent years, DNN model because of its strong learning ability to gradually take the banner rate of the task of the banner.
In the example of click-through rate estimates, we first give the Google's Wide & Deep model. This model combines the advantages of DNN and the applicable logistic regression model for DNN and large-scale sparse features. Then we provide the deep factorization machine for click-through rate prediction. The deep factorization machine combines the factorization machine and deep neural networks to model both low order and high order interactions of input features. In the example of click-through rate estimates, we first give the Google's Wide & Deep model. This model combines the advantages of DNN and the applicable logistic regression model for DNN and large-scale sparse features. Then we provide the deep factorization machine for click-through rate prediction. The deep factorization machine combines the factorization machine and deep neural networks to model both low order and high order interactions of input features.
- 3.1 [Click-Through Rate Model](https://github.com/PaddlePaddle/models/tree/develop/v2/ctr) - 3.1 [Click-Through Rate Model](https://github.com/PaddlePaddle/models/tree/develop/legacy/ctr)
- 3.2 [Deep Factorization Machine for Click-Through Rate prediction](https://github.com/PaddlePaddle/models/tree/develop/v2/deep_fm) - 3.2 [Deep Factorization Machine for Click-Through Rate prediction](https://github.com/PaddlePaddle/models/tree/develop/legacy/deep_fm)
## 4. Text classification ## 4. Text classification
...@@ -36,7 +36,7 @@ Text classification is one of the most basic tasks in natural language processin ...@@ -36,7 +36,7 @@ Text classification is one of the most basic tasks in natural language processin
For text classification, we provide a non-sequential text classification model based on DNN and CNN. (For LSTM-based model, please refer to PaddleBook [Sentiment Analysis](http://www.paddlepaddle.org/docs/develop/book/06.understand_sentiment/index.html)). For text classification, we provide a non-sequential text classification model based on DNN and CNN. (For LSTM-based model, please refer to PaddleBook [Sentiment Analysis](http://www.paddlepaddle.org/docs/develop/book/06.understand_sentiment/index.html)).
- 4.1 [Sentiment analysis based on DNN / CNN](https://github.com/PaddlePaddle/models/tree/develop/v2/text_classification) - 4.1 [Sentiment analysis based on DNN / CNN](https://github.com/PaddlePaddle/models/tree/develop/legacy/text_classification)
## 5. Learning to rank ## 5. Learning to rank
...@@ -45,14 +45,14 @@ The depth neural network can be used to model the fractional function to form va ...@@ -45,14 +45,14 @@ The depth neural network can be used to model the fractional function to form va
The algorithms for learning to rank are usually categorized into three groups by their input representation and the loss function. These are pointwise, pairwise and listwise approaches. Here we demonstrate RankLoss loss function method (pairwise approach), and LambdaRank loss function method (listwise approach). (For Pointwise approaches, please refer to [Recommended System](http://www.paddlepaddle.org/docs/develop/book/05.recommender_system/index.html)). The algorithms for learning to rank are usually categorized into three groups by their input representation and the loss function. These are pointwise, pairwise and listwise approaches. Here we demonstrate RankLoss loss function method (pairwise approach), and LambdaRank loss function method (listwise approach). (For Pointwise approaches, please refer to [Recommended System](http://www.paddlepaddle.org/docs/develop/book/05.recommender_system/index.html)).
- 5.1 [Learning to rank based on Pairwise and Listwise approches](https://github.com/PaddlePaddle/models/tree/develop/v2/ltr) - 5.1 [Learning to rank based on Pairwise and Listwise approches](https://github.com/PaddlePaddle/models/tree/develop/legacy/ltr)
## 6. Semantic model ## 6. Semantic model
The deep structured semantic model uses the DNN model to learn the vector representation of the low latitude in a continuous semantic space, finally models the semantic similarity between the two sentences. The deep structured semantic model uses the DNN model to learn the vector representation of the low latitude in a continuous semantic space, finally models the semantic similarity between the two sentences.
In this example, we demonstrate how to use PaddlePaddle to implement a generic deep structured semantic model to model the semantic similarity between two strings. The model supports different network structures such as CNN (Convolutional Network), FC (Fully Connected Network), RNN (Recurrent Neural Network), and different loss functions such as classification, regression, and sequencing. In this example, we demonstrate how to use PaddlePaddle to implement a generic deep structured semantic model to model the semantic similarity between two strings. The model supports different network structures such as CNN (Convolutional Network), FC (Fully Connected Network), RNN (Recurrent Neural Network), and different loss functions such as classification, regression, and sequencing.
- 6.1 [Deep structured semantic model](https://github.com/PaddlePaddle/models/tree/develop/v2/dssm) - 6.1 [Deep structured semantic model](https://github.com/PaddlePaddle/models/tree/develop/legacy/dssm)
## 7. Sequence tagging ## 7. Sequence tagging
...@@ -60,7 +60,7 @@ Given the input sequence, the sequence tagging model is one of the most basic ta ...@@ -60,7 +60,7 @@ Given the input sequence, the sequence tagging model is one of the most basic ta
In the example of the sequence tagging, we describe how to train an end-to-end sequence tagging model with the Named Entity Recognition (NER) task as an example. In the example of the sequence tagging, we describe how to train an end-to-end sequence tagging model with the Named Entity Recognition (NER) task as an example.
- 7.1 [Name Entity Recognition](https://github.com/PaddlePaddle/models/tree/develop/v2/sequence_tagging_for_ner) - 7.1 [Name Entity Recognition](https://github.com/PaddlePaddle/models/tree/develop/legacy/sequence_tagging_for_ner)
## 8. Sequence to sequence learning ## 8. Sequence to sequence learning
...@@ -68,19 +68,19 @@ Sequence-to-sequence model has a wide range of applications. This includes machi ...@@ -68,19 +68,19 @@ Sequence-to-sequence model has a wide range of applications. This includes machi
As an example for sequence-to-sequence learning, we take the machine translation task. We demonstrate the sequence-to-sequence mapping model without attention mechanism, which is the basis for all sequence-to-sequence learning models. We will use scheduled sampling to improve the problem of error accumulation in the RNN model, and machine translation with external memory mechanism. As an example for sequence-to-sequence learning, we take the machine translation task. We demonstrate the sequence-to-sequence mapping model without attention mechanism, which is the basis for all sequence-to-sequence learning models. We will use scheduled sampling to improve the problem of error accumulation in the RNN model, and machine translation with external memory mechanism.
- 8.1 [Basic Sequence-to-sequence model](https://github.com/PaddlePaddle/models/tree/develop/v2/nmt_without_attention) - 8.1 [Basic Sequence-to-sequence model](https://github.com/PaddlePaddle/models/tree/develop/legacy/nmt_without_attention)
## 9. Image classification ## 9. Image classification
For the example of image classification, we show you how to train AlexNet, VGG, GoogLeNet, ResNet, Inception-v4, Inception-Resnet-V2 and Xception models in PaddlePaddle. It also provides model conversion tools that convert Caffe or TensorFlow trained model files into PaddlePaddle model files. For the example of image classification, we show you how to train AlexNet, VGG, GoogLeNet, ResNet, Inception-v4, Inception-Resnet-V2 and Xception models in PaddlePaddle. It also provides model conversion tools that convert Caffe or TensorFlow trained model files into PaddlePaddle model files.
- 9.1 [convert Caffe model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification/caffe2paddle) - 9.1 [convert Caffe model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification/caffe2paddle)
- 9.2 [convert TensorFlow model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification/tf2paddle) - 9.2 [convert TensorFlow model file to PaddlePaddle model file](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification/tf2paddle)
- 9.3 [AlexNet](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.3 [AlexNet](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.4 [VGG](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.4 [VGG](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.5 [Residual Network](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.5 [Residual Network](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.6 [Inception-v4](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.6 [Inception-v4](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.7 [Inception-Resnet-V2](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.7 [Inception-Resnet-V2](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
- 9.8 [Xception](https://github.com/PaddlePaddle/models/tree/develop/v2/image_classification) - 9.8 [Xception](https://github.com/PaddlePaddle/models/tree/develop/legacy/image_classification)
This tutorial is contributed by [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and licensed under the [Apache-2.0 license](LICENSE). This tutorial is contributed by [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and licensed under the [Apache-2.0 license](LICENSE).
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册