You need to sign in or sign up before continuing.
未验证 提交 1a5d3925 编写于 作者: L lujun 提交者: GitHub

Merge pull request #1 from PaddlePaddle/develop

merge-local
.DS_Store paddle/operators/check_t.save
paddle/operators/check_tensor.ls
paddle/operators/tensor.save
python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/
python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/
python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/
*.DS_Store
*.vs
build/
build_doc/
*.user
.vscode
.idea
.project
.cproject
.pydevproject
.settings/
*.pyc *.pyc
.*~ CMakeSettings.json
fluid/neural_machine_translation/transformer/deps Makefile
fluid/neural_machine_translation/transformer/train.data .test_env/
fluid/neural_machine_translation/transformer/train.pkl third_party/
fluid/neural_machine_translation/transformer/train.sh
fluid/neural_machine_translation/transformer/train.tok.clean.bpe.32000.en-de *~
fluid/neural_machine_translation/transformer/vocab.bpe.32000.refined bazel-*
third_party/
build_*
# clion workspace.
cmake-build-*
model_test
\ No newline at end of file
...@@ -11,5 +11,16 @@ PaddlePaddle provides a rich set of computational units to enable users to adopt ...@@ -11,5 +11,16 @@ PaddlePaddle provides a rich set of computational units to enable users to adopt
- [legacy models](legacy): use PaddlePaddle's v2 APIs. - [legacy models](legacy): use PaddlePaddle's v2 APIs.
PaddlePaddle 提供了丰富的计算单元,使得用户可以采用模块化的方法解决各种学习问题。在此repo中,我们展示了如何用 PaddlePaddle 来解决常见的机器学习任务,提供若干种不同的易学易用的神经网络模型。
- [fluid模型](fluid): 使用 PaddlePaddle Fluid版本的 APIs,我们特别推荐您使用Fluid模型。
- [legacy模型](legacy): 使用 PaddlePaddle v2版本的 APIs。
## License ## License
This tutorial is contributed by [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and licensed under the [Apache-2.0 license](LICENSE). This tutorial is contributed by [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and licensed under the [Apache-2.0 license](LICENSE).
## 许可证书
此向导由[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)贡献,受[Apache-2.0 license](LICENSE)许可认证.
...@@ -25,16 +25,6 @@ def to_lodtensor(data, place): ...@@ -25,16 +25,6 @@ def to_lodtensor(data, place):
return res return res
def lodtensor_to_ndarray(lod_tensor):
"""conver lodtensor to ndarray
"""
dims = lod_tensor._get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
def split_infer_result(infer_seq, lod): def split_infer_result(infer_seq, lod):
infer_batch = [] infer_batch = []
for i in xrange(0, len(lod[0]) - 1): for i in xrange(0, len(lod[0]) - 1):
......
ThreadPool
build
post_latgen_faster_mapped.so
pybind11
aux.tar.gz
aux
data
checkpoints
export CUDA_VISIBLE_DEVICES=4,5,6,7 export CUDA_VISIBLE_DEVICES=4,5,6,7
python -u ../../train.py --train_feature_lst data/train_feature.lst \ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--train_label_lst data/train_label.lst \ --train_label_lst data/train_label.lst \
--val_feature_lst data/val_feature.lst \ --val_feature_lst data/val_feature.lst \
--val_label_lst data/val_label.lst \ --val_label_lst data/val_label.lst \
...@@ -7,7 +7,8 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \ ...@@ -7,7 +7,8 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--checkpoints checkpoints \ --checkpoints checkpoints \
--frame_dim 80 \ --frame_dim 80 \
--class_num 3040 \ --class_num 3040 \
--print_per_batches 100 \
--infer_models '' \ --infer_models '' \
--batch_size 64 \ --batch_size 16 \
--learning_rate 6.4e-5 \ --learning_rate 6.4e-5 \
--parallel --parallel
...@@ -5,19 +5,21 @@ from __future__ import print_function ...@@ -5,19 +5,21 @@ from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
def stacked_lstmp_model(frame_dim, def stacked_lstmp_model(feature,
label,
hidden_dim, hidden_dim,
proj_dim, proj_dim,
stacked_num, stacked_num,
class_num, class_num,
parallel=False, parallel=False,
is_train=True): is_train=True):
""" The model for DeepASR. The main structure is composed of stacked """
identical LSTMP (LSTM with recurrent projection) layers. The model for DeepASR. The main structure is composed of stacked
identical LSTMP (LSTM with recurrent projection) layers.
When running in training and validation phase, the feeding dictionary When running in training and validation phase, the feeding dictionary
is {'feature', 'label'}, fed by the LodTensor for feature data and is {'feature', 'label'}, fed by the LodTensor for feature data and
label data respectively. And in inference, only `feature` is needed. label data respectively. And in inference, only `feature` is needed.
Args: Args:
frame_dim(int): The frame dimension of feature data. frame_dim(int): The frame dimension of feature data.
...@@ -28,80 +30,45 @@ def stacked_lstmp_model(frame_dim, ...@@ -28,80 +30,45 @@ def stacked_lstmp_model(frame_dim,
is_train(bool): Run in training phase or not, default `True`. is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes. class_dim(int): The number of output classes.
""" """
conv1 = fluid.layers.conv2d(
input=feature,
num_filters=32,
filter_size=3,
stride=1,
padding=1,
bias_attr=True,
act="relu")
# network configuration pool1 = fluid.layers.pool2d(
def _net_conf(feature, label): conv1, pool_size=3, pool_type="max", pool_stride=2, pool_padding=0)
conv1 = fluid.layers.conv2d(
input=feature,
num_filters=32,
filter_size=3,
stride=1,
padding=1,
bias_attr=True,
act="relu")
pool1 = fluid.layers.pool2d(
conv1, pool_size=3, pool_type="max", pool_stride=2, pool_padding=0)
stack_input = pool1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=None)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label) stack_input = pool1
avg_cost = fluid.layers.mean(x=cost) for i in range(stacked_num):
acc = fluid.layers.accuracy(input=prediction, label=label) fc = fluid.layers.fc(input=stack_input,
return prediction, avg_cost, acc size=hidden_dim * 4,
bias_attr=None)
# data feeder proj, cell = fluid.layers.dynamic_lstmp(
feature = fluid.layers.data( input=fc,
name="feature", size=hidden_dim * 4,
shape=[-1, 3, 11, frame_dim], proj_size=proj_dim,
dtype="float32", bias_attr=True,
lod_level=1) use_peepholes=True,
label = fluid.layers.data( is_reverse=False,
name="label", shape=[-1, 1], dtype="int64", lod_level=1) cell_activation="tanh",
proj_activation="tanh")
if parallel: bn = fluid.layers.batch_norm(
# When the execution place is specified to CUDAPlace, the program will input=proj,
# run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will is_test=not is_train,
# run on all CPU devices. momentum=0.9,
places = fluid.layers.device.get_places() epsilon=1e-05,
pd = fluid.layers.ParallelDo(places) data_layout='NCHW')
with pd.do(): stack_input = bn
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [prediction, avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices. prediction = fluid.layers.fc(input=stack_input,
prediction, avg_cost, acc = pd() size=class_num,
prediction.stop_gradient = True act='softmax')
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
prediction, avg_cost, acc = _net_conf(feature, label)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_cost, acc return prediction, avg_cost, acc
...@@ -14,7 +14,6 @@ import data_utils.augmentor.trans_add_delta as trans_add_delta ...@@ -14,7 +14,6 @@ import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice import data_utils.augmentor.trans_splice as trans_splice
import data_utils.augmentor.trans_delay as trans_delay import data_utils.augmentor.trans_delay as trans_delay
import data_utils.async_data_reader as reader import data_utils.async_data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model from model_utils.model import stacked_lstmp_model
...@@ -24,7 +23,8 @@ def parse_args(): ...@@ -24,7 +23,8 @@ def parse_args():
'--batch_size', '--batch_size',
type=int, type=int,
default=32, default=32,
help='The sequence number of a batch data. (default: %(default)d)') help='The sequence number of a batch data. Batch size per GPU. (default: %(default)d)'
)
parser.add_argument( parser.add_argument(
'--minimum_batch_size', '--minimum_batch_size',
type=int, type=int,
...@@ -147,29 +147,72 @@ def train(args): ...@@ -147,29 +147,72 @@ def train(args):
if args.infer_models != '' and not os.path.exists(args.infer_models): if args.infer_models != '' and not os.path.exists(args.infer_models):
os.mkdir(args.infer_models) os.mkdir(args.infer_models)
prediction, avg_cost, accuracy = stacked_lstmp_model( train_program = fluid.Program()
frame_dim=args.frame_dim, train_startup = fluid.Program()
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=args.class_num,
parallel=args.parallel)
# program for test
test_program = fluid.default_main_program().clone()
#optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost)
with fluid.program_guard(train_program, train_startup):
with fluid.unique_name.guard():
py_train_reader = fluid.layers.py_reader(
capacity=10,
shapes=([-1, 3, 11, args.frame_dim], [-1, 1]),
dtypes=['float32', 'int64'],
lod_levels=[1, 1],
name='train_reader')
feature, label = fluid.layers.read_file(py_train_reader)
prediction, avg_cost, accuracy = stacked_lstmp_model(
feature=feature,
label=label,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=args.class_num)
# optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost)
fluid.memory_optimize(train_program)
test_program = fluid.Program()
test_startup = fluid.Program()
with fluid.program_guard(test_program, test_startup):
with fluid.unique_name.guard():
py_test_reader = fluid.layers.py_reader(
capacity=10,
shapes=([-1, 3, 11, args.frame_dim], [-1, 1]),
dtypes=['float32', 'int64'],
lod_levels=[1, 1],
name='test_reader')
feature, label = fluid.layers.read_file(py_test_reader)
prediction, avg_cost, accuracy = stacked_lstmp_model(
feature=feature,
label=label,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=args.class_num)
test_program = test_program.clone(for_test=True)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(train_startup)
exe.run(test_startup)
if args.parallel:
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 10
train_exe = fluid.ParallelExecutor(
use_cuda=(args.device == 'GPU'),
loss_name=avg_cost.name,
exec_strategy=exec_strategy,
main_program=train_program)
test_exe = fluid.ParallelExecutor(
use_cuda=(args.device == 'GPU'),
main_program=test_program,
exec_strategy=exec_strategy,
share_vars_from=train_exe)
# resume training if initial model provided. # resume training if initial model provided.
if args.init_model_path is not None: if args.init_model_path is not None:
...@@ -181,15 +224,24 @@ def train(args): ...@@ -181,15 +224,24 @@ def train(args):
trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
] ]
feature_t = fluid.LoDTensor() # bind train_reader
label_t = fluid.LoDTensor() train_data_reader = reader.AsyncDataReader(
args.train_feature_lst,
args.train_label_lst,
-1,
split_sentence_threshold=1024)
# validation train_data_reader.set_transformers(ltrans)
def test(exe):
# If test data not found, return invalid cost and accuracy def train_data_provider():
if not (os.path.exists(args.val_feature_lst) and for data in train_data_reader.batch_iterator(args.batch_size,
os.path.exists(args.val_label_lst)): args.minimum_batch_size):
return -1.0, -1.0 yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace())
py_train_reader.decorate_tensor_provider(train_data_provider)
if (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
# test data reader # test data reader
test_data_reader = reader.AsyncDataReader( test_data_reader = reader.AsyncDataReader(
args.val_feature_lst, args.val_feature_lst,
...@@ -197,86 +249,101 @@ def train(args): ...@@ -197,86 +249,101 @@ def train(args):
-1, -1,
split_sentence_threshold=1024) split_sentence_threshold=1024)
test_data_reader.set_transformers(ltrans) test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod, _) = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program,
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
test_costs.append(lodtensor_to_ndarray(cost)[0])
test_accs.append(lodtensor_to_ndarray(acc)[0])
return np.mean(test_costs), np.mean(test_accs)
# train data reader def test_data_provider():
train_data_reader = reader.AsyncDataReader( for data in test_data_reader.batch_iterator(
args.train_feature_lst, args.batch_size, args.minimum_batch_size):
args.train_label_lst, yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace())
-1,
split_sentence_threshold=1024) py_test_reader.decorate_tensor_provider(test_data_provider)
# validation
def test(exe):
# If test data not found, return invalid cost and accuracy
if not (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
batch_id = 0
test_costs = []
test_accs = []
while True:
if batch_id == 0:
py_test_reader.start()
try:
if args.parallel:
cost, acc = exe.run(
fetch_list=[avg_cost.name, accuracy.name],
return_numpy=False)
else:
cost, acc = exe.run(program=test_program,
fetch_list=[avg_cost, accuracy],
return_numpy=False)
sys.stdout.write('.')
sys.stdout.flush()
test_costs.append(np.array(cost)[0])
test_accs.append(np.array(acc)[0])
batch_id += 1
except fluid.core.EOFException:
py_test_reader.reset()
break
return np.mean(test_costs), np.mean(test_accs)
train_data_reader.set_transformers(ltrans)
# train # train
for pass_id in xrange(args.pass_num): for pass_id in xrange(args.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
for batch_id, batch_data in enumerate( batch_id = 0
train_data_reader.batch_iterator(args.batch_size, while True:
args.minimum_batch_size)): if batch_id == 0:
# load_data py_train_reader.start()
(features, labels, lod, name_lst) = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0) to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0)
outs = exe.run(fluid.default_main_program(), try:
feed={"feature": feature_t, if args.parallel:
"label": label_t}, outs = train_exe.run(
fetch_list=[avg_cost, accuracy] if to_print else [], fetch_list=[avg_cost.name, accuracy.name]
return_numpy=False) if to_print else [],
return_numpy=False)
else:
outs = exe.run(program=train_program,
fetch_list=[avg_cost, accuracy]
if to_print else [],
return_numpy=False)
except fluid.core.EOFException:
py_train_reader.reset()
break
if to_print: if to_print:
print("\nBatch %d, train cost: %f, train acc: %f" % if args.parallel:
(batch_id, lodtensor_to_ndarray(outs[0])[0], print("\nBatch %d, train cost: %f, train acc: %f" %
lodtensor_to_ndarray(outs[1])[0])) (batch_id, np.mean(outs[0]), np.mean(outs[1])))
else:
print("\nBatch %d, train cost: %f, train acc: %f" % (
batch_id, np.array(outs[0])[0], np.array(outs[1])[0]))
# save the latest checkpoint # save the latest checkpoint
if args.checkpoints != '': if args.checkpoints != '':
model_path = os.path.join(args.checkpoints, model_path = os.path.join(args.checkpoints,
"deep_asr.latest.checkpoint") "deep_asr.latest.checkpoint")
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path, train_program)
else: else:
sys.stdout.write('.') sys.stdout.write('.')
sys.stdout.flush() sys.stdout.flush()
batch_id += 1
# run test # run test
val_cost, val_acc = test(exe) val_cost, val_acc = test(test_exe if args.parallel else exe)
# save checkpoint per pass # save checkpoint per pass
if args.checkpoints != '': if args.checkpoints != '':
model_path = os.path.join( model_path = os.path.join(
args.checkpoints, args.checkpoints,
"deep_asr.pass_" + str(pass_id) + ".checkpoint") "deep_asr.pass_" + str(pass_id) + ".checkpoint")
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path, train_program)
# save inference model # save inference model
if args.infer_models != '': if args.infer_models != '':
model_path = os.path.join( model_path = os.path.join(
args.infer_models, args.infer_models,
"deep_asr.pass_" + str(pass_id) + ".infer.model") "deep_asr.pass_" + str(pass_id) + ".infer.model")
fluid.io.save_inference_model(model_path, ["feature"], fluid.io.save_inference_model(model_path, ["feature"],
[prediction], exe) [prediction], exe, train_program)
# cal pass time # cal pass time
pass_end_time = time.time() pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time time_consumed = pass_end_time - pass_start_time
...@@ -285,6 +352,19 @@ def train(args): ...@@ -285,6 +352,19 @@ def train(args):
(pass_id, time_consumed, val_cost, val_acc)) (pass_id, time_consumed, val_cost, val_acc))
def batch_data_to_lod_tensors(args, batch_data, place):
features, labels, lod, name_lst = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
return feature_t, label_t
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
......
...@@ -15,6 +15,7 @@ import detection_out ...@@ -15,6 +15,7 @@ import detection_out
import normalize import normalize
import select import select
import crop import crop
import power
import reduction import reduction
#custom layer import ends #custom layer import ends
......
""" a custom layer for 'power', maybe we should implement this in standard way.
more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/power.html
"""
from .register import register
def power_shape(input_shape, shape=None):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
return input_shape
def power_layer(input, name, power=1.0, scale=1.0, shift=0.0):
""" build a layer of type 'Power' using fluid
Args:
@input (variables): input fluid variable for this layer
@name (str): name for this layer
@power (float): parameter from caffe's Power layer
@scale (float): parameter from caffe's Power layer
@shift (float): parameter from caffe's Power layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
scale_out = fluid.layers.scale(
input, scale=scale, bias=shift, bias_after_scale=True)
output = fluid.layers.pow(scale_out, factor=power)
return output
register(kind='Power', shape=power_shape, layer=power_layer)
...@@ -31,12 +31,12 @@ def priorbox_shape(input_shapes, min_size, max_size=None, aspect_ratio=None): ...@@ -31,12 +31,12 @@ def priorbox_shape(input_shapes, min_size, max_size=None, aspect_ratio=None):
def priorbox_layer(inputs, def priorbox_layer(inputs,
name, name,
min_size, min_size,
step,
max_size=None, max_size=None,
aspect_ratio=None, aspect_ratio=None,
flip=True, variance=[0.1, 0.1, 0.2, 0.2],
flip=False,
clip=False, clip=False,
variance=[], step=0.0,
offset=0.5): offset=0.5):
""" build a layer of type 'Priorbox' using fluid """ build a layer of type 'Priorbox' using fluid
...@@ -52,6 +52,8 @@ def priorbox_layer(inputs, ...@@ -52,6 +52,8 @@ def priorbox_layer(inputs,
assert len(inputs) == 2, "invalid inputs for Priorbox[%s]" % (name) assert len(inputs) == 2, "invalid inputs for Priorbox[%s]" % (name)
input = inputs[0] input = inputs[0]
image = inputs[1] image = inputs[1]
steps = tuple(step) if type(step) is list or type(step) is tuple else (step,
step)
box, variance_ = fluid.layers.prior_box( box, variance_ = fluid.layers.prior_box(
input, input,
image, image,
...@@ -60,7 +62,8 @@ def priorbox_layer(inputs, ...@@ -60,7 +62,8 @@ def priorbox_layer(inputs,
aspect_ratio, aspect_ratio,
variance, variance,
flip, flip,
clip, (step, step), clip,
steps,
offset, offset,
min_max_aspect_ratios_order=True) min_max_aspect_ratios_order=True)
""" """
......
...@@ -38,7 +38,7 @@ LAYER_DESCRIPTORS = { ...@@ -38,7 +38,7 @@ LAYER_DESCRIPTORS = {
'MultinomialLogisticLoss': shape_scalar, 'MultinomialLogisticLoss': shape_scalar,
'MVN': shape_not_implemented, 'MVN': shape_not_implemented,
'Pooling': shape_pool, 'Pooling': shape_pool,
'Power': shape_identity, 'Power': shape_power,
'ReLU': shape_identity, 'ReLU': shape_identity,
'PReLU': shape_identity, 'PReLU': shape_identity,
'Scale': shape_identity, 'Scale': shape_identity,
......
...@@ -280,8 +280,17 @@ class Network(object): ...@@ -280,8 +280,17 @@ class Network(object):
param_attr=fluid.ParamAttr(name=prefix + 'negslope')) param_attr=fluid.ParamAttr(name=prefix + 'negslope'))
return output return output
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, def pool(self,
name): pool_type,
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding,
name,
exclusive=True):
# Get the number of channels in the input # Get the number of channels in the input
in_hw = input.shape[2:] in_hw = input.shape[2:]
k_hw = [k_h, k_w] k_hw = [k_h, k_w]
...@@ -295,7 +304,8 @@ class Network(object): ...@@ -295,7 +304,8 @@ class Network(object):
pool_stride=s_hw, pool_stride=s_hw,
pool_padding=padding, pool_padding=padding,
ceil_mode=ceil_mode, ceil_mode=ceil_mode,
pool_type=pool_type) pool_type=pool_type,
exclusive=exclusive)
return output return output
@layer @layer
......
...@@ -67,6 +67,10 @@ def shape_crop(node): ...@@ -67,6 +67,10 @@ def shape_crop(node):
raise KaffeError('crop function had been defined in customer_layers') raise KaffeError('crop function had been defined in customer_layers')
def shape_power(node):
raise KaffeError('power function had been defined in customer_layers')
def shape_data(node): def shape_data(node):
if node.output_shape: if node.output_shape:
# Old-style input specification # Old-style input specification
......
...@@ -26,6 +26,7 @@ def add_arguments(): ...@@ -26,6 +26,7 @@ def add_arguments():
add_argument('dataset_path', str, None, "Cityscape dataset path.") add_argument('dataset_path', str, None, "Cityscape dataset path.")
add_argument('verbose', bool, False, "Print mIoU for each step if verbose.") add_argument('verbose', bool, False, "Print mIoU for each step if verbose.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.")
def mean_iou(pred, label): def mean_iou(pred, label):
...@@ -69,7 +70,7 @@ tp = fluid.Program() ...@@ -69,7 +70,7 @@ tp = fluid.Program()
batch_size = 1 batch_size = 1
reader.default_config['crop_size'] = -1 reader.default_config['crop_size'] = -1
reader.default_config['shuffle'] = False reader.default_config['shuffle'] = False
num_classes = 19 num_classes = args.num_classes
with fluid.program_guard(tp, sp): with fluid.program_guard(tp, sp):
img = fluid.layers.data(name='img', shape=[3, 0, 0], dtype='float32') img = fluid.layers.data(name='img', shape=[3, 0, 0], dtype='float32')
...@@ -84,7 +85,7 @@ tp = tp.clone(True) ...@@ -84,7 +85,7 @@ tp = tp.clone(True)
fluid.memory_optimize( fluid.memory_optimize(
tp, tp,
print_log=False, print_log=False,
skip_opt_set=[pred.name, miou, out_wrong, out_correct], skip_opt_set=set([pred.name, miou, out_wrong, out_correct]),
level=1) level=1)
place = fluid.CPUPlace() place = fluid.CPUPlace()
......
...@@ -20,6 +20,11 @@ op_results = {} ...@@ -20,6 +20,11 @@ op_results = {}
default_epsilon = 1e-3 default_epsilon = 1e-3
default_norm_type = 'bn' default_norm_type = 'bn'
default_group_number = 32 default_group_number = 32
depthwise_use_cudnn = False
bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0)
depthwise_regularizer = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)
@contextlib.contextmanager @contextlib.contextmanager
...@@ -52,20 +57,39 @@ def append_op_result(result, name): ...@@ -52,20 +57,39 @@ def append_op_result(result, name):
def conv(*args, **kargs): def conv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights' if "xception" in name_scope:
init_std = 0.09
elif "logit" in name_scope:
init_std = 0.01
elif name_scope.endswith('depthwise/'):
init_std = 0.33
else:
init_std = 0.06
if name_scope.endswith('depthwise/'):
regularizer = depthwise_regularizer
else:
regularizer = None
kargs['param_attr'] = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=regularizer,
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=init_std))
if 'bias_attr' in kargs and kargs['bias_attr']: if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = name_scope + 'biases' kargs['bias_attr'] = fluid.ParamAttr(
name=name_scope + 'biases',
regularizer=regularizer,
initializer=fluid.initializer.ConstantInitializer(value=0.0))
else: else:
kargs['bias_attr'] = False kargs['bias_attr'] = False
kargs['name'] = name_scope + 'conv'
return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv') return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv')
def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
helper = fluid.layer_helper.LayerHelper('group_norm', **locals())
N, C, H, W = input.shape N, C, H, W = input.shape
if C % G != 0: if C % G != 0:
print("group can not divide channle:", C, G) # print "group can not divide channle:", C, G
for d in range(10): for d in range(10):
for t in [d, -d]: for t in [d, -d]:
if G + t <= 0: continue if G + t <= 0: continue
...@@ -73,29 +97,16 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): ...@@ -73,29 +97,16 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
G = G + t G = G + t
break break
if C % G == 0: if C % G == 0:
print("use group size:", G) # print "use group size:", G
break break
assert C % G == 0 assert C % G == 0
param_shape = (G, ) x = fluid.layers.group_norm(
x = input input,
x = fluid.layers.reshape(x, [N, G, C // G * H * W]) groups=G,
mean = fluid.layers.reduce_mean(x, dim=2, keep_dim=True) param_attr=param_attr,
x = x - mean bias_attr=bias_attr,
var = fluid.layers.reduce_mean(fluid.layers.square(x), dim=2, keep_dim=True) name=name_scope + 'group_norm')
x = x / fluid.layers.sqrt(var + eps) return x
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype='float32',
default_initializer=fluid.initializer.Constant(1.0))
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype='float32', is_bias=True)
x = fluid.layers.elementwise_add(
fluid.layers.elementwise_mul(
x, scale, axis=1), bias, axis=1)
return fluid.layers.reshape(x, input.shape)
def bn(*args, **kargs): def bn(*args, **kargs):
...@@ -106,8 +117,10 @@ def bn(*args, **kargs): ...@@ -106,8 +117,10 @@ def bn(*args, **kargs):
*args, *args,
epsilon=default_epsilon, epsilon=default_epsilon,
momentum=bn_momentum, momentum=bn_momentum,
param_attr=name_scope + 'gamma', param_attr=fluid.ParamAttr(
bias_attr=name_scope + 'beta', name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer),
moving_mean_name=name_scope + 'moving_mean', moving_mean_name=name_scope + 'moving_mean',
moving_variance_name=name_scope + 'moving_variance', moving_variance_name=name_scope + 'moving_variance',
**kargs), **kargs),
...@@ -119,8 +132,10 @@ def bn(*args, **kargs): ...@@ -119,8 +132,10 @@ def bn(*args, **kargs):
args[0], args[0],
default_group_number, default_group_number,
eps=default_epsilon, eps=default_epsilon,
param_attr=name_scope + 'gamma', param_attr=fluid.ParamAttr(
bias_attr=name_scope + 'beta'), name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer)),
'gn') 'gn')
else: else:
raise "Unsupport norm type:" + default_norm_type raise "Unsupport norm type:" + default_norm_type
...@@ -143,7 +158,8 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None): ...@@ -143,7 +158,8 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None):
stride, stride,
groups=input.shape[1], groups=input.shape[1],
padding=(filter // 2) * dilation, padding=(filter // 2) * dilation,
dilation=dilation) dilation=dilation,
use_cudnn=depthwise_use_cudnn)
input = bn(input) input = bn(input)
if act: input = act(input) if act: input = act(input)
with scope('pointwise'): with scope('pointwise'):
......
...@@ -13,6 +13,7 @@ import reader ...@@ -13,6 +13,7 @@ import reader
import models import models
import time import time
def add_argument(name, type, default, help): def add_argument(name, type, default, help):
parser.add_argument('--' + name, default=default, type=type, help=help) parser.add_argument('--' + name, default=default, type=type, help=help)
...@@ -32,15 +33,28 @@ def add_arguments(): ...@@ -32,15 +33,28 @@ def add_arguments():
add_argument('dataset_path', str, None, "Cityscape dataset path.") add_argument('dataset_path', str, None, "Cityscape dataset path.")
add_argument('parallel', bool, False, "using ParallelExecutor.") add_argument('parallel', bool, False, "using ParallelExecutor.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.")
def load_model(): def load_model():
myvars = [
x for x in tp.list_vars()
if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') ==
-1
]
if args.init_weights_path.endswith('/'): if args.init_weights_path.endswith('/'):
fluid.io.load_params( if args.num_classes == 19:
exe, dirname=args.init_weights_path, main_program=tp) fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
else:
fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=myvars)
else: else:
fluid.io.load_params( if args.num_classes == 19:
exe, dirname="", filename=args.init_weights_path, main_program=tp) fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
else:
fluid.io.load_vars(
exe, dirname="", filename=args.init_weights_path, vars=myvars)
def save_model(): def save_model():
...@@ -80,6 +94,7 @@ args = parser.parse_args() ...@@ -80,6 +94,7 @@ args = parser.parse_args()
models.clean() models.clean()
models.bn_momentum = 0.9997 models.bn_momentum = 0.9997
models.dropout_keep_prop = 0.9 models.dropout_keep_prop = 0.9
models.label_number = args.num_classes
deeplabv3p = models.deeplabv3p deeplabv3p = models.deeplabv3p
sp = fluid.Program() sp = fluid.Program()
...@@ -89,7 +104,7 @@ batch_size = args.batch_size ...@@ -89,7 +104,7 @@ batch_size = args.batch_size
image_shape = [crop_size, crop_size] image_shape = [crop_size, crop_size]
reader.default_config['crop_size'] = crop_size reader.default_config['crop_size'] = crop_size
reader.default_config['shuffle'] = True reader.default_config['shuffle'] = True
num_classes = 19 num_classes = args.num_classes
weight_decay = 0.00004 weight_decay = 0.00004
base_lr = args.base_lr base_lr = args.base_lr
...@@ -120,7 +135,7 @@ with fluid.program_guard(tp, sp): ...@@ -120,7 +135,7 @@ with fluid.program_guard(tp, sp):
retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set) retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set)
fluid.memory_optimize( fluid.memory_optimize(
tp, print_log=False, skip_opt_set=[pred.name, loss_mean.name], level=1) tp, print_log=False, skip_opt_set=set([pred.name, loss_mean.name]), level=1)
place = fluid.CPUPlace() place = fluid.CPUPlace()
if args.use_gpu: if args.use_gpu:
...@@ -155,8 +170,8 @@ for i, imgs, labels, names in batches: ...@@ -155,8 +170,8 @@ for i, imgs, labels, names in batches:
if i % 100 == 0: if i % 100 == 0:
print("Model is saved to", args.save_weights_path) print("Model is saved to", args.save_weights_path)
save_model() save_model()
print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}" .format(i, print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
np.mean(retv[1]), end_time - prev_start_time)) i, np.mean(retv[1]), end_time - prev_start_time))
print("Training done. Model is saved to", args.save_weights_path) print("Training done. Model is saved to", args.save_weights_path)
save_model() save_model()
...@@ -111,10 +111,10 @@ Evalutaion result is shown as below: ...@@ -111,10 +111,10 @@ Evalutaion result is shown as below:
| Model | RoI function | Batch size | Max iteration | mAP | | Model | RoI function | Batch size | Max iteration | mAP |
| :--------------- | :--------: | :------------: | :------------------: |------: | | :--------------- | :--------: | :------------: | :------------------: |------: |
| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 | | [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 |
| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | | [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 |
| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 | | [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 |
| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 | | [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 |
* Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron. * Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron.
* Fluid RoIPool no padding: Images without padding. * Fluid RoIPool no padding: Images without padding.
......
...@@ -105,10 +105,10 @@ Faster RCNN 目标检测模型 ...@@ -105,10 +105,10 @@ Faster RCNN 目标检测模型
| 模型 | RoI处理方式 | 批量大小 | 迭代次数 | mAP | | 模型 | RoI处理方式 | 批量大小 | 迭代次数 | mAP |
| :--------------- | :--------: | :------------: | :------------------: |------: | | :--------------- | :--------: | :------------: | :------------------: |------: |
| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 | | [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 |
| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | | [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 |
| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 | | [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 |
| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 | | [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 |
......
...@@ -23,29 +23,43 @@ from PIL import ImageFont ...@@ -23,29 +23,43 @@ from PIL import ImageFont
from config import cfg from config import cfg
def box_decoder(target_box, prior_box, prior_box_var): def box_decoder(deltas, boxes, weights):
proposals = np.zeros_like(target_box, dtype=np.float32) if boxes.shape[0] == 0:
prior_box_loc = np.zeros_like(prior_box, dtype=np.float32) return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
prior_box_loc[:, 0] = prior_box[:, 2] - prior_box[:, 0] + 1.
prior_box_loc[:, 1] = prior_box[:, 3] - prior_box[:, 1] + 1. boxes = boxes.astype(deltas.dtype, copy=False)
prior_box_loc[:, 2] = (prior_box[:, 2] + prior_box[:, 0]) / 2
prior_box_loc[:, 3] = (prior_box[:, 3] + prior_box[:, 1]) / 2 widths = boxes[:, 2] - boxes[:, 0] + 1.0
pred_bbox = np.zeros_like(target_box, dtype=np.float32) heights = boxes[:, 3] - boxes[:, 1] + 1.0
for i in range(prior_box.shape[0]): ctr_x = boxes[:, 0] + 0.5 * widths
dw = np.minimum(prior_box_var[2] * target_box[i, 2::4], cfg.bbox_clip) ctr_y = boxes[:, 1] + 0.5 * heights
dh = np.minimum(prior_box_var[3] * target_box[i, 3::4], cfg.bbox_clip)
pred_bbox[i, 0::4] = prior_box_var[0] * target_box[ wx, wy, ww, wh = weights
i, 0::4] * prior_box_loc[i, 0] + prior_box_loc[i, 2] dx = deltas[:, 0::4] * wx
pred_bbox[i, 1::4] = prior_box_var[1] * target_box[ dy = deltas[:, 1::4] * wy
i, 1::4] * prior_box_loc[i, 1] + prior_box_loc[i, 3] dw = deltas[:, 2::4] * ww
pred_bbox[i, 2::4] = np.exp(dw) * prior_box_loc[i, 0] dh = deltas[:, 3::4] * wh
pred_bbox[i, 3::4] = np.exp(dh) * prior_box_loc[i, 1]
proposals[:, 0::4] = pred_bbox[:, 0::4] - pred_bbox[:, 2::4] / 2 # Prevent sending too large values into np.exp()
proposals[:, 1::4] = pred_bbox[:, 1::4] - pred_bbox[:, 3::4] / 2 dw = np.minimum(dw, cfg.bbox_clip)
proposals[:, 2::4] = pred_bbox[:, 0::4] + pred_bbox[:, 2::4] / 2 - 1 dh = np.minimum(dh, cfg.bbox_clip)
proposals[:, 3::4] = pred_bbox[:, 1::4] + pred_bbox[:, 3::4] / 2 - 1
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
return proposals pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
return pred_boxes
def clip_tiled_boxes(boxes, im_shape): def clip_tiled_boxes(boxes, im_shape):
...@@ -73,7 +87,6 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info, ...@@ -73,7 +87,6 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info,
variance_v = np.array(cfg.bbox_reg_weights) variance_v = np.array(cfg.bbox_reg_weights)
confs_v = np.array(confs) confs_v = np.array(confs)
locs_v = np.array(locs) locs_v = np.array(locs)
rois = box_decoder(locs_v, rpn_rois_v, variance_v)
im_results = [[] for _ in range(len(lod) - 1)] im_results = [[] for _ in range(len(lod) - 1)]
new_lod = [0] new_lod = [0]
for i in range(len(lod) - 1): for i in range(len(lod) - 1):
...@@ -81,9 +94,11 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info, ...@@ -81,9 +94,11 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info,
end = lod[i + 1] end = lod[i + 1]
if start == end: if start == end:
continue continue
rois_n = rois[start:end, :] locs_n = locs_v[start:end, :]
rois_n = rpn_rois_v[start:end, :]
rois_n = rois_n / im_info[i][2] rois_n = rois_n / im_info[i][2]
rois_n = clip_tiled_boxes(rois_n, im_info[i][:2]) rois_n = box_decoder(locs_n, rois_n, variance_v)
rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2])
cls_boxes = [[] for _ in range(class_nums)] cls_boxes = [[] for _ in range(class_nums)]
scores_n = confs_v[start:end, :] scores_n = confs_v[start:end, :]
......
...@@ -59,7 +59,7 @@ python train.py \ ...@@ -59,7 +59,7 @@ python train.py \
--model=SE_ResNeXt50_32x4d \ --model=SE_ResNeXt50_32x4d \
--batch_size=32 \ --batch_size=32 \
--total_images=1281167 \ --total_images=1281167 \
--class_dim=1000 --class_dim=1000 \
--image_shape=3,224,224 \ --image_shape=3,224,224 \
--model_save_dir=output/ \ --model_save_dir=output/ \
--with_mem_opt=False \ --with_mem_opt=False \
...@@ -80,8 +80,11 @@ python train.py \ ...@@ -80,8 +80,11 @@ python train.py \
* **lr**: initialized learning rate. Default: 0.1. * **lr**: initialized learning rate. Default: 0.1.
* **pretrained_model**: model path for pretraining. Default: None. * **pretrained_model**: model path for pretraining. Default: None.
* **checkpoint**: the checkpoint path to resume. Default: None. * **checkpoint**: the checkpoint path to resume. Default: None.
* **model_category**: the category of models, ("models"|"models_name"). Default: "models".
Or can start the training step by running the ```run.sh```.
**data reader introduction:** Data reader is defined in ```reader.py```. In [training stage](#training-a-model), random crop and flipping are used, while center crop is used in [evaluation](#inference) and [inference](#inference) stages. Supported data augmentation includes: **data reader introduction:** Data reader is defined in ```reader.py``` and ```reader_cv2.py```, Using CV2 reader can improve the speed of reading. In [training stage](#training-a-model), random crop and flipping are used, while center crop is used in [evaluation](#inference) and [inference](#inference) stages. Supported data augmentation includes:
* rotation * rotation
* color jitter * color jitter
* random crop * random crop
...@@ -183,26 +186,33 @@ Test-12-score: [15.040644], class [386] ...@@ -183,26 +186,33 @@ Test-12-score: [15.040644], class [386]
## Supported models and performances ## Supported models and performances
Models consists of two categories: Models with specified parameters names in model definition and Models without specified parameters, Generate named model by indicating ```model_category = models_name```.
Models are trained by starting with learning rate ```0.1``` and decaying it by ```0.1``` after each pre-defined epoches, if not special introduced. Available top-1/top-5 validation accuracy on ImageNet 2012 are listed in table. Pretrained models can be downloaded by clicking related model names. Models are trained by starting with learning rate ```0.1``` and decaying it by ```0.1``` after each pre-defined epoches, if not special introduced. Available top-1/top-5 validation accuracy on ImageNet 2012 are listed in table. Pretrained models can be downloaded by clicking related model names.
|model | top-1/top-5 accuracy
|- | -: - Released models: specify parameter names
|[AlexNet](http://paddle-imagenet-models.bj.bcebos.com/alexnet_model.tar) | 57.21%/79.72%
|VGG11 | - |model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) |
|VGG13 | - |- |:-: |:-:|
|VGG16 | - |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.zip) | 56.71%/79.18% | 55.88%/78.65% |
|VGG19 | - |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.zip) | 69.22%/89.09% | 69.01%/88.90% |
|GoogleNet | - |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.zip) | 70.14%/89.48% | 69.83%/89.13% |
|InceptionV4 | - |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% |
|MobileNet | - |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% |
|[ResNet50](http://paddle-imagenet-models.bj.bcebos.com/resnet_50_model.tar) | 76.63%/93.10% |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% |
|ResNet101 | - |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% |
|ResNet152 | - |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% |
|[SE_ResNeXt50_32x4d](http://paddle-imagenet-models.bj.bcebos.com/se_resnext_50_model.tar) | 78.33%/93.96% |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% |
|SE_ResNeXt101_32x4d | - |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNext50_32x4d_pretrained.zip) | 78.50%/94.01% | 78.44%/93.96% |
|SE_ResNeXt152_32x4d | - |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.zip) | 79.26%/94.22% | 79.12%/94.20% |
|DPN68 | -
|DPN92 | -
|DPN98 | -
|DPN107 | -
|DPN131 | - - Released models: not specify parameter names
|model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) |
|- |:-: |:-:|
|[ResNet152](http://paddle-imagenet-models.bj.bcebos.com/ResNet152_pretrained.zip) | 78.18%/93.93% | 78.11%/94.04% |
|[SE_ResNeXt50_32x4d](http://paddle-imagenet-models.bj.bcebos.com/se_resnext_50_model.tar) | 78.32%/93.96% | 77.58%/93.73% |
...@@ -58,7 +58,7 @@ python train.py \ ...@@ -58,7 +58,7 @@ python train.py \
--model=SE_ResNeXt50_32x4d \ --model=SE_ResNeXt50_32x4d \
--batch_size=32 \ --batch_size=32 \
--total_images=1281167 \ --total_images=1281167 \
--class_dim=1000 --class_dim=1000 \
--image_shape=3,224,224 \ --image_shape=3,224,224 \
--model_save_dir=output/ \ --model_save_dir=output/ \
--with_mem_opt=False \ --with_mem_opt=False \
...@@ -79,8 +79,9 @@ python train.py \ ...@@ -79,8 +79,9 @@ python train.py \
* **lr**: initialized learning rate. Default: 0.1. * **lr**: initialized learning rate. Default: 0.1.
* **pretrained_model**: model path for pretraining. Default: None. * **pretrained_model**: model path for pretraining. Default: None.
* **checkpoint**: the checkpoint path to resume. Default: None. * **checkpoint**: the checkpoint path to resume. Default: None.
* **model_category**: the category of models, ("models"|"models_name"). Default:"models".
**数据读取器说明:** 数据读取器定义在```reader.py```中。[训练阶段](#training-a-model), 默认采用的增广方式是随机裁剪与水平翻转, 而在[评估](#inference)[推断](#inference)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: **数据读取器说明:** 数据读取器定义在```reader.py``````reader_cv2.py```中, 一般, CV2 reader可以提高数据读取速度, reader(PIL)可以得到相对更高的精度, [训练阶段](#training-a-model), 默认采用的增广方式是随机裁剪与水平翻转, 而在[评估](#inference)[推断](#inference)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有:
* 旋转 * 旋转
* 颜色抖动 * 颜色抖动
* 随机裁剪 * 随机裁剪
...@@ -183,27 +184,30 @@ Test-12-score: [15.040644], class [386] ...@@ -183,27 +184,30 @@ Test-12-score: [15.040644], class [386]
``` ```
## 已有模型及其性能 ## 已有模型及其性能
Models包括两种模型:带有参数名字的模型,和不带有参数名字的模型。通过设置 ```model_category = models_name```来训练带有参数名字的模型。
表格中列出了在"models"目录下支持的神经网络种类,并且给出了已完成训练的模型在ImageNet-2012验证集合上的top-1/top-5精度;如无特征说明,训练模型的初始学习率为```0.1```,每隔预定的epochs会下降```0.1```。预训练模型可以通过点击相应模型的名称进行下载。 表格中列出了在"models"目录下支持的神经网络种类,并且给出了已完成训练的模型在ImageNet-2012验证集合上的top-1/top-5精度;如无特征说明,训练模型的初始学习率为```0.1```,每隔预定的epochs会下降```0.1```。预训练模型可以通过点击相应模型的名称进行下载。
|model | top-1/top-5 accuracy
|- | -: - Released models: specify parameter names
|[AlexNet](http://paddle-imagenet-models.bj.bcebos.com/alexnet_model.tar) | 57.21%/79.72%
|VGG11 | - |model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) |
|VGG13 | - |- |:-: |:-:|
|VGG16 | - |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.zip) | 56.71%/79.18% | 55.88%/78.65% |
|VGG19 | - |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.zip) | 69.22%/89.09% | 69.01%/88.90% |
|GoogleNet | - |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.zip) | 70.14%/89.48% | 69.83%/89.13% |
|InceptionV4 | - |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% |
|MobileNet | - |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% |
|[ResNet50](http://paddle-imagenet-models.bj.bcebos.com/resnet_50_model.tar) | 76.63%/93.10% |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% |
|ResNet101 | - |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% |
|ResNet152 | - |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% |
|[SE_ResNeXt50_32x4d](http://paddle-imagenet-models.bj.bcebos.com/se_resnext_50_model.tar) | 78.33%/93.96% |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% |
|SE_ResNeXt101_32x4d | - |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNext50_32x4d_pretrained.zip) | 78.50%/94.01% | 78.44%/93.96% |
|SE_ResNeXt152_32x4d | - |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.zip) | 79.26%/94.22% | 79.12%/94.20% |
|DPN68 | -
|DPN92 | - - Released models: not specify parameter names
|DPN98 | -
|DPN107 | - |model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) |
|DPN131 | - |- |:-: |:-:|
|[ResNet152](http://paddle-imagenet-models.bj.bcebos.com/ResNet152_pretrained.zip) | 78.18%/93.93% | 78.11%/94.04% |
|[SE_ResNeXt50_32x4d](http://paddle-imagenet-models.bj.bcebos.com/se_resnext_50_model.tar) | 78.32%/93.96% | 77.58%/93.73% |
...@@ -26,6 +26,7 @@ import six ...@@ -26,6 +26,7 @@ import six
import sys import sys
sys.path.append("..") sys.path.append("..")
import models import models
import utils
from reader import train, val from reader import train, val
def parse_args(): def parse_args():
...@@ -149,13 +150,15 @@ def get_model(args, is_train, main_prog, startup_prog): ...@@ -149,13 +150,15 @@ def get_model(args, is_train, main_prog, startup_prog):
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
# NOTE: we put weight decay in layers config, and remove
# weight decay on bn layers, so don't add weight decay in
# optimizer config.
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=models.learning_rate.lr_warmup( learning_rate=utils.learning_rate.lr_warmup(
fluid.layers.piecewise_decay( fluid.layers.piecewise_decay(
boundaries=bd, values=lr), boundaries=bd, values=lr),
warmup_steps, start_lr, end_lr), warmup_steps, start_lr, end_lr),
momentum=0.9, momentum=0.9)
regularization=fluid.regularizer.L2Decay(1e-4))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
batched_reader = None batched_reader = None
...@@ -175,6 +178,7 @@ def append_nccl2_prepare(trainer_id, startup_prog): ...@@ -175,6 +178,7 @@ def append_nccl2_prepare(trainer_id, startup_prog):
for ip in worker_ips.split(","): for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port])) worker_endpoints.append(':'.join([ip, port]))
current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
num_trainers = len(worker_endpoints)
config = fluid.DistributeTranspilerConfig() config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2" config.mode = "nccl2"
...@@ -182,6 +186,7 @@ def append_nccl2_prepare(trainer_id, startup_prog): ...@@ -182,6 +186,7 @@ def append_nccl2_prepare(trainer_id, startup_prog):
t.transpile(trainer_id, trainers=','.join(worker_endpoints), t.transpile(trainer_id, trainers=','.join(worker_endpoints),
current_endpoint=current_endpoint, current_endpoint=current_endpoint,
startup_program=startup_prog) startup_program=startup_prog)
return num_trainers, trainer_id
def dist_transpile(trainer_id, args, train_prog, startup_prog): def dist_transpile(trainer_id, args, train_prog, startup_prog):
...@@ -281,12 +286,12 @@ def test_single(exe, test_args, args, test_prog): ...@@ -281,12 +286,12 @@ def test_single(exe, test_args, args, test_prog):
def train_parallel(train_args, test_args, args, train_prog, test_prog, def train_parallel(train_args, test_args, args, train_prog, test_prog,
startup_prog, nccl_id_var, num_trainers, trainer_id): startup_prog, num_trainers, trainer_id):
over_all_start = time.time() over_all_start = time.time()
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
if nccl_id_var and trainer_id == 0: if args.update_method == "nccl2" and trainer_id == 0:
#FIXME(wuyi): wait other trainer to start listening #FIXME(typhoonzero): wait other trainer to start listening
time.sleep(30) time.sleep(30)
startup_exe = fluid.Executor(place) startup_exe = fluid.Executor(place)
...@@ -398,8 +403,8 @@ def main(): ...@@ -398,8 +403,8 @@ def main():
# the unique trainer id, starting from 0, needed by trainer # the unique trainer id, starting from 0, needed by trainer
# only # only
nccl_id_var, num_trainers, trainer_id = ( num_trainers, trainer_id = (
None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0"))) 1, int(os.getenv("PADDLE_TRAINER_ID", "0")))
train_prog = fluid.Program() train_prog = fluid.Program()
test_prog = fluid.Program() test_prog = fluid.Program()
...@@ -418,7 +423,7 @@ def main(): ...@@ -418,7 +423,7 @@ def main():
"Must configure correct environments to run dist train.") "Must configure correct environments to run dist train.")
all_args.extend([train_prog, test_prog, startup_prog]) all_args.extend([train_prog, test_prog, startup_prog])
if os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER": if os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER":
all_args.extend([nccl_id_var, num_trainers, trainer_id]) all_args.extend([num_trainers, trainer_id])
train_parallel(*all_args) train_parallel(*all_args)
elif os.getenv("PADDLE_TRAINING_ROLE") == "PSERVER": elif os.getenv("PADDLE_TRAINING_ROLE") == "PSERVER":
# start pserver with Executor # start pserver with Executor
...@@ -431,10 +436,10 @@ def main(): ...@@ -431,10 +436,10 @@ def main():
all_args.extend([train_prog, test_prog, startup_prog]) all_args.extend([train_prog, test_prog, startup_prog])
if args.update_method == "nccl2": if args.update_method == "nccl2":
nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare( num_trainers, trainer_id = append_nccl2_prepare(
trainer_id, startup_prog) trainer_id, startup_prog)
all_args.extend([nccl_id_var, num_trainers, trainer_id]) all_args.extend([num_trainers, trainer_id])
train_parallel(*all_args) train_parallel(*all_args)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -7,11 +7,13 @@ import time ...@@ -7,11 +7,13 @@ import time
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import models #import models
import reader import models_name as models
#import reader_cv2 as reader
import reader as reader
import argparse import argparse
import functools import functools
from models.learning_rate import cosine_decay from utils.learning_rate import cosine_decay
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
import math import math
......
from .alexnet import AlexNet from .alexnet import AlexNet
from .mobilenet import MobileNet from .mobilenet import MobileNet
from .mobilenet_v2 import MobileNetV2
from .googlenet import GoogleNet from .googlenet import GoogleNet
from .vgg import VGG11, VGG13, VGG16, VGG19 from .vgg import VGG11, VGG13, VGG16, VGG19
from .resnet import ResNet50, ResNet101, ResNet152 from .resnet import ResNet50, ResNet101, ResNet152
...@@ -7,4 +8,4 @@ from .resnet_dist import DistResNet ...@@ -7,4 +8,4 @@ from .resnet_dist import DistResNet
from .inception_v4 import InceptionV4 from .inception_v4 import InceptionV4
from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d
from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131
import learning_rate from .shufflenet_v2 import ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0
...@@ -5,8 +5,8 @@ import os ...@@ -5,8 +5,8 @@ import os
import numpy as np import numpy as np
import time import time
import sys import sys
import math
import paddle.fluid as fluid import paddle.fluid as fluid
import math
__all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"] __all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"]
...@@ -62,7 +62,6 @@ class DPN(object): ...@@ -62,7 +62,6 @@ class DPN(object):
pool_padding=1, pool_padding=1,
pool_type='max') pool_type='max')
#conv2 - conv5
for gc in range(4): for gc in range(4):
bw = bws[gc] bw = bws[gc]
inc = inc_sec[gc] inc = inc_sec[gc]
......
...@@ -13,7 +13,7 @@ train_parameters = { ...@@ -13,7 +13,7 @@ train_parameters = {
"learning_strategy": { "learning_strategy": {
"name": "piecewise_decay", "name": "piecewise_decay",
"batch_size": 256, "batch_size": 256,
"epochs": [30, 60, 90], "epochs": [30, 70, 100],
"steps": [0.1, 0.01, 0.001, 0.0001] "steps": [0.1, 0.01, 0.001, 0.0001]
} }
} }
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = ['MobileNetV2']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class MobileNetV2():
def __init__(self):
self.params = train_parameters
def net(self, input, class_dim=1000, scale=1.0):
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
input = self.conv_bn_layer(
input,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
if_act=True)
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
input = self.invresi_blocks(
input=input,
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s, )
in_c = int(c * scale)
input = self.conv_bn_layer(
input=input,
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding=0,
if_act=True)
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
act='softmax',
param_attr=ParamAttr(initializer=MSRA()))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
use_cudnn=True,
if_act=True):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(initializer=MSRA()),
bias_attr=False)
bn = fluid.layers.batch_norm(input=conv)
if if_act:
return fluid.layers.relu6(bn)
else:
return bn
def shortcut(self, input, data_residual):
return fluid.layers.elementwise_add(input, data_residual)
def inverted_residual_unit(self, input, num_in_filter, num_filters,
ifshortcut, stride, filter_size, padding,
expansion_factor):
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = self.conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
bottleneck_conv = self.conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
if_act=True,
use_cudnn=False)
linear_out = self.conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False)
if ifshortcut:
out = self.shortcut(input=input, data_residual=linear_out)
return out
else:
return linear_out
def invresi_blocks(self, input, in_c, t, c, n, s):
first_block = self.inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
ifshortcut=False,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t)
last_residual_block = first_block
last_c = c
for i in range(1, n):
last_residual_block = self.inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
ifshortcut=True,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t)
return last_residual_block
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = [
'ShuffleNetV2', 'ShuffleNetV2_x0_5', 'ShuffleNetV2_x1_0',
'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0'
]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class ShuffleNetV2():
def __init__(self, scale=1.0):
self.params = train_parameters
self.scale = scale
def net(self, input, class_dim=1000):
scale = self.scale
stage_repeats = [4, 8, 4]
if scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise ValueError("""{} groups is not supported for
1x1 Grouped Convolutions""".format(num_groups))
#conv1
input_channel = stage_out_channels[1]
conv1 = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=input_channel,
padding=1,
stride=2)
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
conv = pool1
# bottleneck sequences
for idxstage in range(len(stage_repeats)):
numrepeat = stage_repeats[idxstage]
output_channel = stage_out_channels[idxstage + 2]
for i in range(numrepeat):
if i == 0:
conv = self.inverted_residual_unit(
input=conv,
num_filters=output_channel,
stride=2,
benchmodel=2)
else:
conv = self.inverted_residual_unit(
input=conv,
num_filters=output_channel,
stride=1,
benchmodel=1)
conv_last = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=stage_out_channels[-1],
padding=0,
stride=1)
pool_last = fluid.layers.pool2d(
input=conv_last,
pool_size=7,
pool_stride=7,
pool_padding=0,
pool_type='avg')
output = fluid.layers.fc(input=pool_last,
size=class_dim,
act='softmax',
param_attr=ParamAttr(initializer=MSRA()))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
use_cudnn=True,
if_act=True):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(initializer=MSRA()),
bias_attr=False)
if if_act:
return fluid.layers.batch_norm(input=conv, act='relu')
else:
return fluid.layers.batch_norm(input=conv)
def channel_shuffle(self, x, groups):
batchsize, num_channels, height, width = x.shape[0], x.shape[
1], x.shape[2], x.shape[3]
channels_per_group = num_channels // groups
# reshape
x = fluid.layers.reshape(
x=x, shape=[batchsize, groups, channels_per_group, height, width])
x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = fluid.layers.reshape(
x=x, shape=[batchsize, num_channels, height, width])
return x
def inverted_residual_unit(self, input, num_filters, stride, benchmodel):
assert stride in [1, 2], \
"supported stride are {} but your stride is {}".format([1,2], stride)
oup_inc = num_filters // 2
inp = input.shape[1]
if benchmodel == 1:
x1, x2 = fluid.layers.split(
input,
num_or_sections=[input.shape[1] // 2, input.shape[1] // 2],
dim=1)
conv_pw = self.conv_bn_layer(
input=x2,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
conv_dw = self.conv_bn_layer(
input=conv_pw,
num_filters=oup_inc,
filter_size=3,
stride=stride,
padding=1,
num_groups=oup_inc,
if_act=False)
conv_linear = self.conv_bn_layer(
input=conv_dw,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
out = fluid.layers.concat([x1, conv_linear], axis=1)
else:
#branch1
conv_dw = self.conv_bn_layer(
input=input,
num_filters=inp,
filter_size=3,
stride=stride,
padding=1,
num_groups=inp,
if_act=False)
conv_linear_1 = self.conv_bn_layer(
input=conv_dw,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
#branch2
conv_pw = self.conv_bn_layer(
input=input,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
conv_dw = self.conv_bn_layer(
input=conv_pw,
num_filters=oup_inc,
filter_size=3,
stride=stride,
padding=1,
num_groups=oup_inc,
if_act=False)
conv_linear_2 = self.conv_bn_layer(
input=conv_dw,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True)
out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1)
return self.channel_shuffle(out, 2)
def ShuffleNetV2_x0_5():
model = ShuffleNetV2(scale=0.5)
return model
def ShuffleNetV2_x1_0():
model = ShuffleNetV2(scale=1.0)
return model
def ShuffleNetV2_x1_5():
model = ShuffleNetV2(scale=1.5)
return model
def ShuffleNetV2_x2_0():
model = ShuffleNetV2(scale=2.0)
return model
from .alexnet import AlexNet
from .mobilenet import MobileNet
from .mobilenet_v2 import MobileNetV2
from .googlenet import GoogleNet
from .vgg import VGG11, VGG13, VGG16, VGG19
from .resnet import ResNet50, ResNet101, ResNet152
from .inception_v4 import InceptionV4
from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d
from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131
from .shufflenet_v2 import ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
__all__ = ['AlexNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [40, 70, 100],
"steps": [0.01, 0.001, 0.0001, 0.00001]
}
}
class AlexNet():
def __init__(self):
self.params = train_parameters
def net(self, input, class_dim=1000):
stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11)
layer_name = [
"conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8"
]
conv1 = fluid.layers.conv2d(
input=input,
num_filters=64,
filter_size=11,
stride=4,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_weights"))
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5)
conv2 = fluid.layers.conv2d(
input=pool1,
num_filters=192,
filter_size=5,
stride=1,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_weights"))
pool2 = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3)
conv3 = fluid.layers.conv2d(
input=pool2,
num_filters=384,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_weights"))
stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_weights"))
stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_weights"))
pool5 = fluid.layers.pool2d(
input=conv5,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] *
drop6.shape[3] * 1.0)
fc6 = fluid.layers.fc(
input=drop6,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_weights"))
drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0)
fc7 = fluid.layers.fc(
input=drop7,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_weights"))
stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0)
out = fluid.layers.fc(
input=fc7,
size=class_dim,
act='softmax',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_weights"))
return out
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import time
import sys
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class DPN(object):
def __init__(self, layers=68):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
# get network args
args = self.get_net_args(self.layers)
bws = args['bw']
inc_sec = args['inc_sec']
rs = args['bw']
k_r = args['k_r']
k_sec = args['k_sec']
G = args['G']
init_num_filter = args['init_num_filter']
init_filter_size = args['init_filter_size']
init_padding = args['init_padding']
## define Dual Path Network
# conv1
conv1_x_1 = fluid.layers.conv2d(
input=input,
num_filters=init_num_filter,
filter_size=init_filter_size,
stride=2,
padding=init_padding,
groups=1,
act=None,
bias_attr=False,
name="conv1",
param_attr=ParamAttr(name="conv1_weights"), )
conv1_x_1 = fluid.layers.batch_norm(
input=conv1_x_1,
act='relu',
is_test=False,
name="conv1_bn",
param_attr=ParamAttr(name='conv1_bn_scale'),
bias_attr=ParamAttr('conv1_bn_offset'),
moving_mean_name='conv1_bn_mean',
moving_variance_name='conv1_bn_variance', )
convX_x_x = fluid.layers.pool2d(
input=conv1_x_1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max',
name="pool1")
#conv2 - conv5
match_list, num = [], 0
for gc in range(4):
bw = bws[gc]
inc = inc_sec[gc]
R = (k_r * bw) // rs[gc]
if gc == 0:
_type1 = 'proj'
_type2 = 'normal'
match = 1
else:
_type1 = 'down'
_type2 = 'normal'
match = match + k_sec[gc - 1]
match_list.append(match)
convX_x_x = self.dual_path_factory(
convX_x_x, R, R, bw, inc, G, _type1, name="dpn" + str(match))
for i_ly in range(2, k_sec[gc] + 1):
num += 1
if num in match_list:
num += 1
convX_x_x = self.dual_path_factory(
convX_x_x, R, R, bw, inc, G, _type2, name="dpn" + str(num))
conv5_x_x = fluid.layers.concat(convX_x_x, axis=1)
conv5_x_x = fluid.layers.batch_norm(
input=conv5_x_x,
act='relu',
is_test=False,
name="final_concat_bn",
param_attr=ParamAttr(name='final_concat_bn_scale'),
bias_attr=ParamAttr('final_concat_bn_offset'),
moving_mean_name='final_concat_bn_mean',
moving_variance_name='final_concat_bn_variance', )
pool5 = fluid.layers.pool2d(
input=conv5_x_x,
pool_size=7,
pool_stride=1,
pool_padding=0,
pool_type='avg', )
stdv = 0.01
param_attr = fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv))
fc6 = fluid.layers.fc(input=pool5,
size=class_dim,
act='softmax',
param_attr=param_attr,
name="fc6")
return fc6
def get_net_args(self, layers):
if layers == 68:
k_r = 128
G = 32
k_sec = [3, 4, 12, 3]
inc_sec = [16, 32, 32, 64]
bw = [64, 128, 256, 512]
r = [64, 64, 64, 64]
init_num_filter = 10
init_filter_size = 3
init_padding = 1
elif layers == 92:
k_r = 96
G = 32
k_sec = [3, 4, 20, 3]
inc_sec = [16, 32, 24, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 64
init_filter_size = 7
init_padding = 3
elif layers == 98:
k_r = 160
G = 40
k_sec = [3, 6, 20, 3]
inc_sec = [16, 32, 32, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 96
init_filter_size = 7
init_padding = 3
elif layers == 107:
k_r = 200
G = 50
k_sec = [4, 8, 20, 3]
inc_sec = [20, 64, 64, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 128
init_filter_size = 7
init_padding = 3
elif layers == 131:
k_r = 160
G = 40
k_sec = [4, 8, 28, 3]
inc_sec = [16, 32, 32, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 128
init_filter_size = 7
init_padding = 3
else:
raise NotImplementedError
net_arg = {
'k_r': k_r,
'G': G,
'k_sec': k_sec,
'inc_sec': inc_sec,
'bw': bw,
'r': r
}
net_arg['init_num_filter'] = init_num_filter
net_arg['init_filter_size'] = init_filter_size
net_arg['init_padding'] = init_padding
return net_arg
def dual_path_factory(self,
data,
num_1x1_a,
num_3x3_b,
num_1x1_c,
inc,
G,
_type='normal',
name=None):
kw = 3
kh = 3
pw = (kw - 1) // 2
ph = (kh - 1) // 2
# type
if _type is 'proj':
key_stride = 1
has_proj = True
if _type is 'down':
key_stride = 2
has_proj = True
if _type is 'normal':
key_stride = 1
has_proj = False
# PROJ
if type(data) is list:
data_in = fluid.layers.concat([data[0], data[1]], axis=1)
else:
data_in = data
if has_proj:
c1x1_w = self.bn_ac_conv(
data=data_in,
num_filter=(num_1x1_c + 2 * inc),
kernel=(1, 1),
pad=(0, 0),
stride=(key_stride, key_stride),
name=name + "_match")
data_o1, data_o2 = fluid.layers.split(
c1x1_w,
num_or_sections=[num_1x1_c, 2 * inc],
dim=1,
name=name + "_match_conv_Slice")
else:
data_o1 = data[0]
data_o2 = data[1]
# MAIN
c1x1_a = self.bn_ac_conv(
data=data_in,
num_filter=num_1x1_a,
kernel=(1, 1),
pad=(0, 0),
name=name + "_conv1")
c3x3_b = self.bn_ac_conv(
data=c1x1_a,
num_filter=num_3x3_b,
kernel=(kw, kh),
pad=(pw, ph),
stride=(key_stride, key_stride),
num_group=G,
name=name + "_conv2")
c1x1_c = self.bn_ac_conv(
data=c3x3_b,
num_filter=(num_1x1_c + inc),
kernel=(1, 1),
pad=(0, 0),
name=name + "_conv3")
c1x1_c1, c1x1_c2 = fluid.layers.split(
c1x1_c,
num_or_sections=[num_1x1_c, inc],
dim=1,
name=name + "_conv3_Slice")
# OUTPUTS
summ = fluid.layers.elementwise_add(
x=data_o1, y=c1x1_c1, name=name + "_elewise")
dense = fluid.layers.concat(
[data_o2, c1x1_c2], axis=1, name=name + "_concat")
return [summ, dense]
def bn_ac_conv(self,
data,
num_filter,
kernel,
pad,
stride=(1, 1),
num_group=1,
name=None):
bn_ac = fluid.layers.batch_norm(
input=data,
act='relu',
is_test=False,
name=name + '.output.1',
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance', )
bn_ac_conv = fluid.layers.conv2d(
input=bn_ac,
num_filters=num_filter,
filter_size=kernel,
stride=stride,
padding=pad,
groups=num_group,
act=None,
bias_attr=False,
param_attr=ParamAttr(name=name + "_weights"))
return bn_ac_conv
def DPN68():
model = DPN(layers=68)
return model
def DPN92():
onvodel = DPN(layers=92)
return model
def DPN98():
model = DPN(layers=98)
return model
def DPN107():
model = DPN(layers=107)
return model
def DPN131():
model = DPN(layers=131)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
__all__ = ['GoogleNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 70, 100],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class GoogleNet():
def __init__(self):
self.params = train_parameters
def conv_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
channels = input.shape[1]
stdv = (3.0 / (filter_size**2 * channels))**0.5
param_attr = ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=act,
param_attr=param_attr,
bias_attr=False,
name=name)
return conv
def xavier(self, channels, filter_size, name):
stdv = (3.0 / (filter_size**2 * channels))**0.5
param_attr = ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_weights")
return param_attr
def inception(self,
input,
channels,
filter1,
filter3R,
filter3,
filter5R,
filter5,
proj,
name=None):
conv1 = self.conv_layer(
input=input,
num_filters=filter1,
filter_size=1,
stride=1,
act=None,
name="inception_" + name + "_1x1")
conv3r = self.conv_layer(
input=input,
num_filters=filter3R,
filter_size=1,
stride=1,
act=None,
name="inception_" + name + "_3x3_reduce")
conv3 = self.conv_layer(
input=conv3r,
num_filters=filter3,
filter_size=3,
stride=1,
act=None,
name="inception_" + name + "_3x3")
conv5r = self.conv_layer(
input=input,
num_filters=filter5R,
filter_size=1,
stride=1,
act=None,
name="inception_" + name + "_5x5_reduce")
conv5 = self.conv_layer(
input=conv5r,
num_filters=filter5,
filter_size=5,
stride=1,
act=None,
name="inception_" + name + "_5x5")
pool = fluid.layers.pool2d(
input=input,
pool_size=3,
pool_stride=1,
pool_padding=1,
pool_type='max')
convprj = fluid.layers.conv2d(
input=pool,
filter_size=1,
num_filters=proj,
stride=1,
padding=0,
name="inception_" + name + "_3x3_proj",
param_attr=ParamAttr(
name="inception_" + name + "_3x3_proj_weights"),
bias_attr=False)
cat = fluid.layers.concat(input=[conv1, conv3, conv5, convprj], axis=1)
cat = fluid.layers.relu(cat)
return cat
def net(self, input, class_dim=1000):
conv = self.conv_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act=None,
name="conv1")
pool = fluid.layers.pool2d(
input=conv, pool_size=3, pool_type='max', pool_stride=2)
conv = self.conv_layer(
input=pool,
num_filters=64,
filter_size=1,
stride=1,
act=None,
name="conv2_1x1")
conv = self.conv_layer(
input=conv,
num_filters=192,
filter_size=3,
stride=1,
act=None,
name="conv2_3x3")
pool = fluid.layers.pool2d(
input=conv, pool_size=3, pool_type='max', pool_stride=2)
ince3a = self.inception(pool, 192, 64, 96, 128, 16, 32, 32, "ince3a")
ince3b = self.inception(ince3a, 256, 128, 128, 192, 32, 96, 64,
"ince3b")
pool3 = fluid.layers.pool2d(
input=ince3b, pool_size=3, pool_type='max', pool_stride=2)
ince4a = self.inception(pool3, 480, 192, 96, 208, 16, 48, 64, "ince4a")
ince4b = self.inception(ince4a, 512, 160, 112, 224, 24, 64, 64,
"ince4b")
ince4c = self.inception(ince4b, 512, 128, 128, 256, 24, 64, 64,
"ince4c")
ince4d = self.inception(ince4c, 512, 112, 144, 288, 32, 64, 64,
"ince4d")
ince4e = self.inception(ince4d, 528, 256, 160, 320, 32, 128, 128,
"ince4e")
pool4 = fluid.layers.pool2d(
input=ince4e, pool_size=3, pool_type='max', pool_stride=2)
ince5a = self.inception(pool4, 832, 256, 160, 320, 32, 128, 128,
"ince5a")
ince5b = self.inception(ince5a, 832, 384, 192, 384, 48, 128, 128,
"ince5b")
pool5 = fluid.layers.pool2d(
input=ince5b, pool_size=7, pool_type='avg', pool_stride=7)
dropout = fluid.layers.dropout(x=pool5, dropout_prob=0.4)
out = fluid.layers.fc(input=dropout,
size=class_dim,
act='softmax',
param_attr=self.xavier(1024, 1, "out"),
name="out",
bias_attr=ParamAttr(name="out_offset"))
pool_o1 = fluid.layers.pool2d(
input=ince4a, pool_size=5, pool_type='avg', pool_stride=3)
conv_o1 = self.conv_layer(
input=pool_o1,
num_filters=128,
filter_size=1,
stride=1,
act=None,
name="conv_o1")
fc_o1 = fluid.layers.fc(input=conv_o1,
size=1024,
act='relu',
param_attr=self.xavier(2048, 1, "fc_o1"),
name="fc_o1",
bias_attr=ParamAttr(name="fc_o1_offset"))
dropout_o1 = fluid.layers.dropout(x=fc_o1, dropout_prob=0.7)
out1 = fluid.layers.fc(input=dropout_o1,
size=class_dim,
act='softmax',
param_attr=self.xavier(1024, 1, "out1"),
name="out1",
bias_attr=ParamAttr(name="out1_offset"))
pool_o2 = fluid.layers.pool2d(
input=ince4d, pool_size=5, pool_type='avg', pool_stride=3)
conv_o2 = self.conv_layer(
input=pool_o2,
num_filters=128,
filter_size=1,
stride=1,
act=None,
name="conv_o2")
fc_o2 = fluid.layers.fc(input=conv_o2,
size=1024,
act='relu',
param_attr=self.xavier(2048, 1, "fc_o2"),
name="fc_o2",
bias_attr=ParamAttr(name="fc_o2_offset"))
dropout_o2 = fluid.layers.dropout(x=fc_o2, dropout_prob=0.7)
out2 = fluid.layers.fc(input=dropout_o2,
size=class_dim,
act='softmax',
param_attr=self.xavier(1024, 1, "out2"),
name="out2",
bias_attr=ParamAttr(name="out2_offset"))
# last fc layer is "out"
return out, out1, out2
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = ['InceptionV4']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class InceptionV4():
def __init__(self):
self.params = train_parameters
def net(self, input, class_dim=1000):
x = self.inception_stem(input)
for i in range(4):
x = self.inceptionA(x, name=str(i + 1))
x = self.reductionA(x)
for i in range(7):
x = self.inceptionB(x, name=str(i + 1))
x = self.reductionB(x)
for i in range(3):
x = self.inceptionC(x, name=str(i + 1))
pool = fluid.layers.pool2d(
input=x, pool_size=8, pool_type='avg', global_pooling=True)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(
input=drop,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name="final_fc_weights"),
bias_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name="final_fc_offset"))
return out
def conv_bn_layer(self,
data,
num_filters,
filter_size,
stride=1,
padding=0,
groups=1,
act='relu',
name=None):
conv = fluid.layers.conv2d(
input=data,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name)
bn_name = name + "_bn"
return fluid.layers.batch_norm(
input=conv,
act=act,
name=bn_name,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def inception_stem(self, data, name=None):
conv = self.conv_bn_layer(
data, 32, 3, stride=2, act='relu', name="conv1_3x3_s2")
conv = self.conv_bn_layer(conv, 32, 3, act='relu', name="conv2_3x3_s1")
conv = self.conv_bn_layer(
conv, 64, 3, padding=1, act='relu', name="conv3_3x3_s1")
pool1 = fluid.layers.pool2d(
input=conv, pool_size=3, pool_stride=2, pool_type='max')
conv2 = self.conv_bn_layer(
conv, 96, 3, stride=2, act='relu', name="inception_stem1_3x3_s2")
concat = fluid.layers.concat([pool1, conv2], axis=1)
conv1 = self.conv_bn_layer(
concat, 64, 1, act='relu', name="inception_stem2_3x3_reduce")
conv1 = self.conv_bn_layer(
conv1, 96, 3, act='relu', name="inception_stem2_3x3")
conv2 = self.conv_bn_layer(
concat, 64, 1, act='relu', name="inception_stem2_1x7_reduce")
conv2 = self.conv_bn_layer(
conv2,
64, (7, 1),
padding=(3, 0),
act='relu',
name="inception_stem2_1x7")
conv2 = self.conv_bn_layer(
conv2,
64, (1, 7),
padding=(0, 3),
act='relu',
name="inception_stem2_7x1")
conv2 = self.conv_bn_layer(
conv2, 96, 3, act='relu', name="inception_stem2_3x3_2")
concat = fluid.layers.concat([conv1, conv2], axis=1)
conv1 = self.conv_bn_layer(
concat, 192, 3, stride=2, act='relu', name="inception_stem3_3x3_s2")
pool1 = fluid.layers.pool2d(
input=concat, pool_size=3, pool_stride=2, pool_type='max')
concat = fluid.layers.concat([conv1, pool1], axis=1)
return concat
def inceptionA(self, data, name=None):
pool1 = fluid.layers.pool2d(
input=data, pool_size=3, pool_padding=1, pool_type='avg')
conv1 = self.conv_bn_layer(
pool1, 96, 1, act='relu', name="inception_a" + name + "_1x1")
conv2 = self.conv_bn_layer(
data, 96, 1, act='relu', name="inception_a" + name + "_1x1_2")
conv3 = self.conv_bn_layer(
data, 64, 1, act='relu', name="inception_a" + name + "_3x3_reduce")
conv3 = self.conv_bn_layer(
conv3,
96,
3,
padding=1,
act='relu',
name="inception_a" + name + "_3x3")
conv4 = self.conv_bn_layer(
data,
64,
1,
act='relu',
name="inception_a" + name + "_3x3_2_reduce")
conv4 = self.conv_bn_layer(
conv4,
96,
3,
padding=1,
act='relu',
name="inception_a" + name + "_3x3_2")
conv4 = self.conv_bn_layer(
conv4,
96,
3,
padding=1,
act='relu',
name="inception_a" + name + "_3x3_3")
concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1)
return concat
def reductionA(self, data, name=None):
pool1 = fluid.layers.pool2d(
input=data, pool_size=3, pool_stride=2, pool_type='max')
conv2 = self.conv_bn_layer(
data, 384, 3, stride=2, act='relu', name="reduction_a_3x3")
conv3 = self.conv_bn_layer(
data, 192, 1, act='relu', name="reduction_a_3x3_2_reduce")
conv3 = self.conv_bn_layer(
conv3, 224, 3, padding=1, act='relu', name="reduction_a_3x3_2")
conv3 = self.conv_bn_layer(
conv3, 256, 3, stride=2, act='relu', name="reduction_a_3x3_3")
concat = fluid.layers.concat([pool1, conv2, conv3], axis=1)
return concat
def inceptionB(self, data, name=None):
pool1 = fluid.layers.pool2d(
input=data, pool_size=3, pool_padding=1, pool_type='avg')
conv1 = self.conv_bn_layer(
pool1, 128, 1, act='relu', name="inception_b" + name + "_1x1")
conv2 = self.conv_bn_layer(
data, 384, 1, act='relu', name="inception_b" + name + "_1x1_2")
conv3 = self.conv_bn_layer(
data, 192, 1, act='relu', name="inception_b" + name + "_1x7_reduce")
conv3 = self.conv_bn_layer(
conv3,
224, (1, 7),
padding=(0, 3),
act='relu',
name="inception_b" + name + "_1x7")
conv3 = self.conv_bn_layer(
conv3,
256, (7, 1),
padding=(3, 0),
act='relu',
name="inception_b" + name + "_7x1")
conv4 = self.conv_bn_layer(
data,
192,
1,
act='relu',
name="inception_b" + name + "_7x1_2_reduce")
conv4 = self.conv_bn_layer(
conv4,
192, (1, 7),
padding=(0, 3),
act='relu',
name="inception_b" + name + "_1x7_2")
conv4 = self.conv_bn_layer(
conv4,
224, (7, 1),
padding=(3, 0),
act='relu',
name="inception_b" + name + "_7x1_2")
conv4 = self.conv_bn_layer(
conv4,
224, (1, 7),
padding=(0, 3),
act='relu',
name="inception_b" + name + "_1x7_3")
conv4 = self.conv_bn_layer(
conv4,
256, (7, 1),
padding=(3, 0),
act='relu',
name="inception_b" + name + "_7x1_3")
concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1)
return concat
def reductionB(self, data, name=None):
pool1 = fluid.layers.pool2d(
input=data, pool_size=3, pool_stride=2, pool_type='max')
conv2 = self.conv_bn_layer(
data, 192, 1, act='relu', name="reduction_b_3x3_reduce")
conv2 = self.conv_bn_layer(
conv2, 192, 3, stride=2, act='relu', name="reduction_b_3x3")
conv3 = self.conv_bn_layer(
data, 256, 1, act='relu', name="reduction_b_1x7_reduce")
conv3 = self.conv_bn_layer(
conv3,
256, (1, 7),
padding=(0, 3),
act='relu',
name="reduction_b_1x7")
conv3 = self.conv_bn_layer(
conv3,
320, (7, 1),
padding=(3, 0),
act='relu',
name="reduction_b_7x1")
conv3 = self.conv_bn_layer(
conv3, 320, 3, stride=2, act='relu', name="reduction_b_3x3_2")
concat = fluid.layers.concat([pool1, conv2, conv3], axis=1)
return concat
def inceptionC(self, data, name=None):
pool1 = fluid.layers.pool2d(
input=data, pool_size=3, pool_padding=1, pool_type='avg')
conv1 = self.conv_bn_layer(
pool1, 256, 1, act='relu', name="inception_c" + name + "_1x1")
conv2 = self.conv_bn_layer(
data, 256, 1, act='relu', name="inception_c" + name + "_1x1_2")
conv3 = self.conv_bn_layer(
data, 384, 1, act='relu', name="inception_c" + name + "_1x1_3")
conv3_1 = self.conv_bn_layer(
conv3,
256, (1, 3),
padding=(0, 1),
act='relu',
name="inception_c" + name + "_1x3")
conv3_2 = self.conv_bn_layer(
conv3,
256, (3, 1),
padding=(1, 0),
act='relu',
name="inception_c" + name + "_3x1")
conv4 = self.conv_bn_layer(
data, 384, 1, act='relu', name="inception_c" + name + "_1x1_4")
conv4 = self.conv_bn_layer(
conv4,
448, (1, 3),
padding=(0, 1),
act='relu',
name="inception_c" + name + "_1x3_2")
conv4 = self.conv_bn_layer(
conv4,
512, (3, 1),
padding=(1, 0),
act='relu',
name="inception_c" + name + "_3x1_2")
conv4_1 = self.conv_bn_layer(
conv4,
256, (1, 3),
padding=(0, 1),
act='relu',
name="inception_c" + name + "_1x3_3")
conv4_2 = self.conv_bn_layer(
conv4,
256, (3, 1),
padding=(1, 0),
act='relu',
name="inception_c" + name + "_3x1_3")
concat = fluid.layers.concat(
[conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1)
return concat
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = ['MobileNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class MobileNet():
def __init__(self):
self.params = train_parameters
def net(self, input, class_dim=1000, scale=1.0):
# conv1: 112x112
input = self.conv_bn_layer(
input,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1,
name="conv1")
# 56x56
input = self.depthwise_separable(
input,
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale,
name="conv2_1")
input = self.depthwise_separable(
input,
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=2,
scale=scale,
name="conv2_2")
# 28x28
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale,
name="conv3_1")
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale,
name="conv3_2")
# 14x14
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale,
name="conv4_1")
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale,
name="conv4_2")
# 14x14
for i in range(5):
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale,
name="conv5" + "_" + str(i + 1))
# 7x7
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale,
name="conv5_6")
input = self.depthwise_separable(
input,
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale,
name="conv6")
input = fluid.layers.pool2d(
input=input,
pool_size=0,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=MSRA(), name="fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=name + "_weights"),
bias_attr=False)
bn_name = name + "_bn"
return fluid.layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def depthwise_separable(self,
input,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
depthwise_conv = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=int(num_filters1 * scale),
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False,
name=name + "_dw")
pointwise_conv = self.conv_bn_layer(
input=depthwise_conv,
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0,
name=name + "_sep")
return pointwise_conv
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = ['MobileNetV2']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class MobileNetV2():
def __init__(self):
self.params = train_parameters
def net(self, input, class_dim=1000, scale=1.0):
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
#conv1
input = self.conv_bn_layer(
input,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
if_act=True,
name='conv1_1')
# bottleneck sequences
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
input = self.invresi_blocks(
input=input,
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s,
name='conv' + str(i))
in_c = int(c * scale)
#last_conv
input = self.conv_bn_layer(
input=input,
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding=0,
if_act=True,
name='conv9')
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
act='softmax',
param_attr=ParamAttr(name='fc10_weights'),
bias_attr=ParamAttr(name='fc10_offset'))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
if_act=True,
name=None,
use_cudnn=True):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
return fluid.layers.relu6(bn)
else:
return bn
def shortcut(self, input, data_residual):
return fluid.layers.elementwise_add(input, data_residual)
def inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
stride,
filter_size,
padding,
expansion_factor,
name=None):
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = self.conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = self.conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
linear_out = self.conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
if ifshortcut:
out = self.shortcut(input=input, data_residual=linear_out)
return out
else:
return linear_out
def invresi_blocks(self, input, in_c, t, c, n, s, name=None):
first_block = self.inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
ifshortcut=False,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_1')
last_residual_block = first_block
last_c = c
for i in range(1, n):
last_residual_block = self.inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
ifshortcut=True,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_' + str(i + 1))
return last_residual_block
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class ResNet():
def __init__(self, layers=50):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name="conv1")
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=pool,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name + '.conv2d.output.1')
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
return fluid.layers.batch_norm(
input=conv,
act=act,
name=bn_name + '.output.1',
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance', )
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
else:
return input
def bottleneck_block(self, input, num_filters, stride, name):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
short = self.shortcut(
input, num_filters * 4, stride, name=name + "_branch1")
return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")
def ResNet50():
model = ResNet(layers=50)
return model
def ResNet101():
model = ResNet(layers=101)
return model
def ResNet152():
model = ResNet(layers=152)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = [
"SE_ResNeXt", "SE_ResNeXt50_32x4d", "SE_ResNeXt101_32x4d",
"SE_ResNeXt152_32x4d"
]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"dropout_seed": None,
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [40, 80, 100],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class SE_ResNeXt():
def __init__(self, layers=50):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name='conv1', )
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name="conv1", )
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name='conv1')
conv = self.conv_bn_layer(
input=conv,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='conv2')
conv = self.conv_bn_layer(
input=conv,
num_filters=128,
filter_size=3,
stride=1,
act='relu',
name='conv3')
conv = fluid.layers.pool2d(
input=conv, pool_size=3, pool_stride=2, pool_padding=1, \
pool_type='max')
n = 1 if layers == 50 or layers == 101 else 3
for block in range(len(depth)):
n += 1
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=cardinality,
reduction_ratio=reduction_ratio,
name=str(n) + '_' + str(i + 1))
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
drop = fluid.layers.dropout(
x=pool, dropout_prob=0.5, seed=self.params['dropout_seed'])
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(
input=drop,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name='fc6_weights'),
bias_attr=ParamAttr(name='fc6_offset'))
return out
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
filter_size = 1
return self.conv_bn_layer(
input, ch_out, filter_size, stride, name='conv' + name + '_prj')
else:
return input
def bottleneck_block(self,
input,
num_filters,
stride,
cardinality,
reduction_ratio,
name=None):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name='conv' + name + '_x1')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act='relu',
name='conv' + name + '_x2')
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 2,
filter_size=1,
act=None,
name='conv' + name + '_x3')
scale = self.squeeze_excitation(
input=conv2,
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio,
name='fc' + name)
short = self.shortcut(input, num_filters * 2, stride, name=name)
return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False,
param_attr=ParamAttr(name=name + '_weights'), )
bn_name = name + "_bn"
return fluid.layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def squeeze_excitation(self,
input,
num_channels,
reduction_ratio,
name=None):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
size=num_channels // reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_sqz_weights'),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(
input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def SE_ResNeXt50_32x4d():
model = SE_ResNeXt(layers=50)
return model
def SE_ResNeXt101_32x4d():
model = SE_ResNeXt(layers=101)
return model
def SE_ResNeXt152_32x4d():
model = SE_ResNeXt(layers=152)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = [
'ShuffleNetV2', 'ShuffleNetV2_x0_5', 'ShuffleNetV2_x1_0',
'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0'
]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class ShuffleNetV2():
def __init__(self, scale=1.0):
self.params = train_parameters
self.scale = scale
def net(self, input, class_dim=1000):
scale = self.scale
stage_repeats = [4, 8, 4]
if scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise ValueError("""{} groups is not supported for
1x1 Grouped Convolutions""".format(num_groups))
#conv1
input_channel = stage_out_channels[1]
conv1 = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=input_channel,
padding=1,
stride=2,
name='stage1_conv')
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
conv = pool1
# bottleneck sequences
for idxstage in range(len(stage_repeats)):
numrepeat = stage_repeats[idxstage]
output_channel = stage_out_channels[idxstage + 2]
for i in range(numrepeat):
if i == 0:
conv = self.inverted_residual_unit(
input=conv,
num_filters=output_channel,
stride=2,
benchmodel=2,
name=str(idxstage + 2) + '_' + str(i + 1))
else:
conv = self.inverted_residual_unit(
input=conv,
num_filters=output_channel,
stride=1,
benchmodel=1,
name=str(idxstage + 2) + '_' + str(i + 1))
conv_last = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=stage_out_channels[-1],
padding=0,
stride=1,
name='conv5')
pool_last = fluid.layers.pool2d(
input=conv_last,
pool_size=7,
pool_stride=1,
pool_padding=0,
pool_type='avg')
output = fluid.layers.fc(input=pool_last,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=MSRA(), name='fc6_weights'),
bias_attr=ParamAttr(name='fc6_offset'))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
use_cudnn=True,
if_act=True,
name=None):
# print(num_groups)
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
if if_act:
return fluid.layers.batch_norm(
input=conv,
act='relu',
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
else:
return fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def channel_shuffle(self, x, groups):
batchsize, num_channels, height, width = x.shape[0], x.shape[
1], x.shape[2], x.shape[3]
channels_per_group = num_channels // groups
# reshape
x = fluid.layers.reshape(
x=x, shape=[batchsize, groups, channels_per_group, height, width])
x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = fluid.layers.reshape(
x=x, shape=[batchsize, num_channels, height, width])
return x
def inverted_residual_unit(self,
input,
num_filters,
stride,
benchmodel,
name=None):
assert stride in [1, 2], \
"supported stride are {} but your stride is {}".format([1,2], stride)
oup_inc = num_filters // 2
inp = input.shape[1]
if benchmodel == 1:
x1, x2 = fluid.layers.split(
input,
num_or_sections=[input.shape[1] // 2, input.shape[1] // 2],
dim=1)
# x1 = input[:, :(input.shape[1]//2), :, :]
# x2 = input[:, (input.shape[1]//2):, :, :]
conv_pw = self.conv_bn_layer(
input=x2,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name='stage_' + name + '_conv1')
conv_dw = self.conv_bn_layer(
input=conv_pw,
num_filters=oup_inc,
filter_size=3,
stride=stride,
padding=1,
num_groups=oup_inc,
if_act=False,
name='stage_' + name + '_conv2')
conv_linear = self.conv_bn_layer(
input=conv_dw,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name='stage_' + name + '_conv3')
out = fluid.layers.concat([x1, conv_linear], axis=1)
else:
#branch1
conv_dw_1 = self.conv_bn_layer(
input=input,
num_filters=inp,
filter_size=3,
stride=stride,
padding=1,
num_groups=inp,
if_act=False,
name='stage_' + name + '_conv4')
conv_linear_1 = self.conv_bn_layer(
input=conv_dw_1,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name='stage_' + name + '_conv5')
#branch2
conv_pw_2 = self.conv_bn_layer(
input=input,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name='stage_' + name + '_conv1')
conv_dw_2 = self.conv_bn_layer(
input=conv_pw_2,
num_filters=oup_inc,
filter_size=3,
stride=stride,
padding=1,
num_groups=oup_inc,
if_act=False,
name='stage_' + name + '_conv2')
conv_linear_2 = self.conv_bn_layer(
input=conv_dw_2,
num_filters=oup_inc,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name='stage_' + name + '_conv3')
out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1)
return self.channel_shuffle(out, 2)
def ShuffleNetV2_x0_5():
model = ShuffleNetV2(scale=0.5)
return model
def ShuffleNetV2_x1_0():
model = ShuffleNetV2(scale=1.0)
return model
def ShuffleNetV2_x1_5():
model = ShuffleNetV2(scale=1.5)
return model
def ShuffleNetV2_x2_0():
model = ShuffleNetV2(scale=2.0)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
__all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class VGGNet():
def __init__(self, layers=16):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
layers = self.layers
vgg_spec = {
11: ([1, 1, 2, 2, 2]),
13: ([2, 2, 2, 2, 2]),
16: ([2, 2, 3, 3, 3]),
19: ([2, 2, 4, 4, 4])
}
assert layers in vgg_spec.keys(), \
"supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers)
nums = vgg_spec[layers]
conv1 = self.conv_block(input, 64, nums[0], name="conv1_")
conv2 = self.conv_block(conv1, 128, nums[1], name="conv2_")
conv3 = self.conv_block(conv2, 256, nums[2], name="conv3_")
conv4 = self.conv_block(conv3, 512, nums[3], name="conv4_")
conv5 = self.conv_block(conv4, 512, nums[4], name="conv5_")
fc_dim = 4096
fc_name = ["fc6", "fc7", "fc8"]
fc1 = fluid.layers.fc(
input=conv5,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset"))
fc1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5)
fc2 = fluid.layers.fc(
input=fc1,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset"))
fc2 = fluid.layers.dropout(x=fc2, dropout_prob=0.5)
out = fluid.layers.fc(
input=fc2,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset"))
return out
def conv_block(self, input, num_filter, groups, name=None):
conv = input
for i in range(groups):
conv = fluid.layers.conv2d(
input=conv,
num_filters=num_filter,
filter_size=3,
stride=1,
padding=1,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
name=name + str(i + 1) + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(
name=name + str(i + 1) + "_offset"))
return fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2)
def VGG11():
model = VGGNet(layers=11)
return model
def VGG13():
model = VGGNet(layers=13)
return model
def VGG16():
model = VGGNet(layers=16)
return model
def VGG19():
model = VGGNet(layers=19)
return model
...@@ -54,7 +54,7 @@ def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): ...@@ -54,7 +54,7 @@ def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
scale_min = min(scale[0], bound) scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
scale_max) scale_max)
target_size = math.sqrt(target_area) target_size = math.sqrt(target_area)
w = int(target_size * w) w = int(target_size * w)
h = int(target_size * h) h = int(target_size * h)
...@@ -169,7 +169,12 @@ def _reader_creator(file_list, ...@@ -169,7 +169,12 @@ def _reader_creator(file_list,
def train(data_dir=DATA_DIR): def train(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'train_list.txt') file_list = os.path.join(data_dir, 'train_list.txt')
return _reader_creator( return _reader_creator(
file_list, 'train', shuffle=True, color_jitter=False, rotate=False, data_dir=data_dir) file_list,
'train',
shuffle=True,
color_jitter=False,
rotate=False,
data_dir=data_dir)
def val(data_dir=DATA_DIR): def val(data_dir=DATA_DIR):
......
import os
import math
import random
import functools
import numpy as np
import paddle
import cv2
import io
random.seed(0)
np.random.seed(0)
DATA_DIM = 224
THREAD = 8
BUF_SIZE = 102400
DATA_DIR = 'data/ILSVRC2012'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def rotate_image(img):
""" rotate_image """
(h, w) = img.shape[:2]
center = (w / 2, h / 2)
angle = np.random.randint(-10, 11)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
return rotated
def random_crop(img, size, scale=None, ratio=None):
""" random_crop """
scale = [0.08, 1.0] if scale is None else scale
ratio = [3. / 4., 4. / 3.] if ratio is None else ratio
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.shape[1]) / img.shape[0]) / (w**2),
(float(img.shape[0]) / img.shape[1]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, img.size[0] - w + 1)
j = np.random.randint(0, img.size[1] - h + 1)
img = img[i:i + h, j:j + w, :]
resized = cv2.resize(img, (size, size))
return resized
def distort_color(img):
return img
def resize_short(img, target_size):
""" resize_short """
percent = float(target_size) / min(img.shape[0], img.shape[1])
resized_width = int(round(img.shape[1] * percent))
resized_height = int(round(img.shape[0] * percent))
resized = cv2.resize(img, (resized_width, resized_height))
return resized
def crop_image(img, target_size, center):
""" crop_image """
height, width = img.shape[:2]
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img[h_start:h_end, w_start:w_end, :]
return img
def process_image(sample,
mode,
color_jitter,
rotate,
crop_size=224,
mean=None,
std=None):
""" process_image """
mean = [0.485, 0.456, 0.406] if mean is None else mean
std = [0.229, 0.224, 0.225] if std is None else std
img_path = sample[0]
img = cv2.imread(img_path)
if mode == 'train':
if rotate:
img = rotate_image(img)
if crop_size > 0:
img = random_crop(img, crop_size)
if color_jitter:
img = distort_color(img)
if np.random.randint(0, 2) == 1:
img = img[:, ::-1, :]
else:
if crop_size > 0:
img = resize_short(img, crop_size)
img = crop_image(img, target_size=crop_size, center=True)
img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
img_mean = np.array(mean).reshape((3, 1, 1))
img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean
img /= img_std
if mode == 'train' or mode == 'val':
return (img, sample[1])
elif mode == 'test':
return (img, )
def image_mapper(**kwargs):
""" image_mapper """
return functools.partial(process_image, **kwargs)
def _reader_creator(file_list,
mode,
shuffle=False,
color_jitter=False,
rotate=False,
data_dir=DATA_DIR):
def reader():
with open(file_list) as flist:
full_lines = [line.strip() for line in flist]
if shuffle:
np.random.shuffle(lines)
if mode == 'train' and os.getenv('PADDLE_TRAINING_ROLE'):
# distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
per_node_lines = len(full_lines) // trainer_count
lines = full_lines[trainer_id * per_node_lines:(trainer_id + 1)
* per_node_lines]
print(
"read images from %d, length: %d, lines length: %d, total: %d"
% (trainer_id * per_node_lines, per_node_lines, len(lines),
len(full_lines)))
else:
lines = full_lines
for line in lines:
if mode == 'train' or mode == 'val':
img_path, label = line.split()
img_path = img_path.replace("JPEG", "jpeg")
img_path = os.path.join(data_dir, img_path)
yield img_path, int(label)
elif mode == 'test':
img_path = os.path.join(DATA_DIR, line)
yield [img_path]
image_mapper = functools.partial(
process_image,
mode=mode,
color_jitter=color_jitter,
rotate=color_jitter,
crop_size=224)
reader = paddle.reader.xmap_readers(
image_mapper, reader, THREAD, BUF_SIZE, order=False)
return reader
def train(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'train_list.txt')
return _reader_creator(
file_list,
'train',
shuffle=True,
color_jitter=False,
rotate=False,
data_dir=data_dir)
def val(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'val_list.txt')
return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir)
def test(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'val_list.txt')
return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir)
#Hyperparameters config
python train.py \
--model=SE_ResNeXt50_32x4d \
--batch_size=32 \
--total_images=1281167 \
--class_dim=1000 \
--image_shape=3,224,224 \
--model_save_dir=output/ \
--with_mem_opt=False \
--lr_strategy=piecewise_decay \
--lr=0.1
# >log_SE_ResNeXt50_32x4d.txt 2>&1 &
#AlexNet:
#python train.py \
# --model=AlexNet \
# --batch_size=256 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=False \
# --lr_strategy=piecewise_decay \
# --num_epochs=120 \
# --lr=0.01
#VGG11:
#python train.py \
# --model=VGG11 \
# --batch_size=512 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=False \
# --lr_strategy=piecewise_decay \
# --num_epochs=120 \
# --lr=0.1
#MobileNet v1:
#python train.py \
# --model=MobileNet \
# --batch_size=256 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=False \
# --lr_strategy=piecewise_decay \
# --num_epochs=120 \
# --lr=0.1
#ResNet50:
#python train.py \
# --model=ResNet50 \
# --batch_size=256 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=False \
# --lr_strategy=piecewise_decay \
# --num_epochs=120 \
# --lr=0.1
#ResNet101:
#python train.py \
# --model=ResNet101 \
# --batch_size=256 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=False \
# --lr_strategy=piecewise_decay \
# --num_epochs=120 \
# --lr=0.1
#ResNet152:
#python train.py \
# --model=ResNet152 \
# --batch_size=256 \
# --total_images=1281167 \
# --image_shape=3,224,224 \
# --lr_strategy=piecewise_decay \
# --lr=0.1 \
# --num_epochs=120 \
# --l2_decay=1e-4 \(TODO)
#SE_ResNeXt50:
#python train.py \
# --model=SE_ResNeXt50 \
# --batch_size=400 \
# --total_images=1281167 \
# --image_shape=3,224,224 \
# --lr_strategy=cosine_decay \
# --lr=0.1 \
# --num_epochs=200 \
# --l2_decay=12e-5 \(TODO)
#SE_ResNeXt101:
#python train.py \
# --model=SE_ResNeXt101 \
# --batch_size=400 \
# --total_images=1281167 \
# --image_shape=3,224,224 \
# --lr_strategy=cosine_decay \
# --lr=0.1 \
# --num_epochs=200 \
# --l2_decay=15e-5 \(TODO)
#VGG11:
#python train.py \
# --model=VGG11 \
# --batch_size=512 \
# --total_images=1281167 \
# --image_shape=3,224,224 \
# --lr_strategy=cosine_decay \
# --lr=0.1 \
# --num_epochs=90 \
# --l2_decay=2e-4 \(TODO)
#VGG13:
#python train.py
# --model=VGG13 \
# --batch_size=256 \
# --total_images=1281167 \
# --image_shape=3,224,224 \
# --lr_strategy=cosine_decay \
# --lr=0.01 \
# --num_epochs=90 \
# --l2_decay=3e-4 \(TODO)
...@@ -13,8 +13,13 @@ import paddle.dataset.flowers as flowers ...@@ -13,8 +13,13 @@ import paddle.dataset.flowers as flowers
import models import models
import reader import reader
import argparse import argparse
from models.learning_rate import cosine_decay import functools
import subprocess
import utils
from utils.learning_rate import cosine_decay
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
import models
import models_name
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -34,20 +39,25 @@ add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate ...@@ -34,20 +39,25 @@ add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.") add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
# yapf: enable add_arg('model_category', str, "models", "Whether to use models_name or not, valid value:'models','models_name'" )
# yapf: enabl
model_list = [m for m in dir(models) if "__" not in m]
def set_models(model):
global models
if model == "models":
models = models
else:
models = models_name
def optimizer_setting(params): def optimizer_setting(params):
ls = params["learning_strategy"] ls = params["learning_strategy"]
if ls["name"] == "piecewise_decay": if ls["name"] == "piecewise_decay":
if "total_images" not in params: if "total_images" not in params:
total_images = 1281167 total_images = 1281167
else: else:
total_images = params["total_images"] total_images = params["total_images"]
batch_size = ls["batch_size"] batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1) step = int(total_images / batch_size + 1)
...@@ -60,6 +70,7 @@ def optimizer_setting(params): ...@@ -60,6 +70,7 @@ def optimizer_setting(params):
boundaries=bd, values=lr), boundaries=bd, values=lr),
momentum=0.9, momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(1e-4))
elif ls["name"] == "cosine_decay": elif ls["name"] == "cosine_decay":
if "total_images" not in params: if "total_images" not in params:
total_images = 1281167 total_images = 1281167
...@@ -76,7 +87,29 @@ def optimizer_setting(params): ...@@ -76,7 +87,29 @@ def optimizer_setting(params):
learning_rate=cosine_decay( learning_rate=cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs), learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=0.9, momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(4e-5))
elif ls["name"] == "exponential_decay":
if "total_images" not in params:
total_images = 1281167
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size +1)
lr = params["lr"]
num_epochs = params["num_epochs"]
learning_decay_rate_factor=ls["learning_decay_rate_factor"]
num_epochs_per_decay = ls["num_epochs_per_decay"]
NUM_GPUS = 1
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.exponential_decay(
learning_rate = lr * NUM_GPUS,
decay_steps = step * num_epochs_per_decay / NUM_GPUS,
decay_rate = learning_decay_rate_factor),
momentum=0.9,
regularization = fluid.regularizer.L2Decay(4e-5))
else: else:
lr = params["lr"] lr = params["lr"]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
...@@ -86,29 +119,16 @@ def optimizer_setting(params): ...@@ -86,29 +119,16 @@ def optimizer_setting(params):
return optimizer return optimizer
def net_config(image, label, model, args):
model_list = [m for m in dir(models) if "__" not in m]
assert args.model in model_list,"{} is not lists: {}".format(
args.model, model_list)
def train(args):
# parameters from arguments
class_dim = args.class_dim class_dim = args.class_dim
model_name = args.model model_name = args.model
checkpoint = args.checkpoint
pretrained_model = args.pretrained_model
with_memory_optimization = args.with_mem_opt
model_save_dir = args.model_save_dir
image_shape = [int(m) for m in args.image_shape.split(",")]
assert model_name in model_list, "{} is not in lists: {}".format(args.model,
model_list)
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# model definition
model = models.__dict__[model_name]()
if args.enable_ce: if args.enable_ce:
assert model_name == "SE_ResNeXt50_32x4d" assert model_name == "SE_ResNeXt50_32x4d"
fluid.default_startup_program().random_seed = 1000
model.params["dropout_seed"] = 100 model.params["dropout_seed"] = 100
class_dim = 102 class_dim = 102
...@@ -132,42 +152,100 @@ def train(args): ...@@ -132,42 +152,100 @@ def train(args):
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
test_program = fluid.default_main_program().clone(for_test=True) return avg_cost, acc_top1, acc_top5
def build_program(is_train, main_prog, startup_prog, args):
image_shape = [int(m) for m in args.image_shape.split(",")]
model_name = args.model
model_list = [m for m in dir(models) if "__" not in m]
assert model_name in model_list, "{} is not in lists: {}".format(args.model,
model_list)
model = models.__dict__[model_name]()
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=16,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=True)
with fluid.unique_name.guard():
image, label = fluid.layers.read_file(py_reader)
avg_cost, acc_top1, acc_top5 = net_config(image, label, model, args)
avg_cost.persistable = True
acc_top1.persistable = True
acc_top5.persistable = True
if is_train:
params = model.params
params["total_images"] = args.total_images
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy
optimizer = optimizer_setting(params)
optimizer.minimize(avg_cost)
return py_reader, avg_cost, acc_top1, acc_top5
# parameters from model and arguments def train(args):
params = model.params # parameters from arguments
params["total_images"] = args.total_images model_name = args.model
params["lr"] = args.lr checkpoint = args.checkpoint
params["num_epochs"] = args.num_epochs pretrained_model = args.pretrained_model
params["learning_strategy"]["batch_size"] = args.batch_size with_memory_optimization = args.with_mem_opt
params["learning_strategy"]["name"] = args.lr_strategy model_save_dir = args.model_save_dir
# initialize optimizer startup_prog = fluid.Program()
optimizer = optimizer_setting(params) train_prog = fluid.Program()
opts = optimizer.minimize(avg_cost) test_prog = fluid.Program()
if args.enable_ce:
startup_prog.random_seed = 1000
train_prog.random_seed = 1000
train_py_reader, train_cost, train_acc1, train_acc5 = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
args=args)
test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
is_train=False,
main_prog=test_prog,
startup_prog=startup_prog,
args=args)
test_prog = test_prog.clone(for_test=True)
if with_memory_optimization: if with_memory_optimization:
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(train_prog)
fluid.memory_optimize(test_prog)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(startup_prog)
if checkpoint is not None: if checkpoint is not None:
fluid.io.load_persistables(exe, checkpoint) fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)
if pretrained_model: if pretrained_model:
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name)) return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) fluid.io.load_vars(
exe, pretrained_model, main_program=train_prog, predicate=if_exist)
train_batch_size = args.batch_size visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
test_batch_size = 16 if visible_device:
device_num = len(visible_device.split(','))
else:
device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n')
train_batch_size = args.batch_size / device_num
test_batch_size = 8
if not args.enable_ce: if not args.enable_ce:
train_reader = paddle.batch(reader.train(), batch_size=train_batch_size) train_reader = paddle.batch(
reader.train(), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
else: else:
# use flowers dataset for CE and set use_xmap False to avoid disorder data # use flowers dataset for CE and set use_xmap False to avoid disorder data
...@@ -176,89 +254,105 @@ def train(args): ...@@ -176,89 +254,105 @@ def train(args):
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
train_reader = paddle.batch( train_reader = paddle.batch(
flowers.train(use_xmap=False), batch_size=train_batch_size) flowers.train(use_xmap=False),
batch_size=train_batch_size,
drop_last=True)
test_reader = paddle.batch( test_reader = paddle.batch(
flowers.test(use_xmap=False), batch_size=test_batch_size) flowers.test(use_xmap=False), batch_size=test_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader)
test_py_reader.decorate_paddle_reader(test_reader)
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name) main_program=train_prog,
use_cuda=bool(args.use_gpu),
loss_name=train_cost.name)
train_fetch_list = [train_cost.name, train_acc1.name, train_acc5.name]
test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name]
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] params = models.__dict__[args.model]().params
gpu = os.getenv("CUDA_VISIBLE_DEVICES") or ""
gpu_nums = len(gpu.split(","))
for pass_id in range(params["num_epochs"]): for pass_id in range(params["num_epochs"]):
train_py_reader.start()
train_info = [[], [], []] train_info = [[], [], []]
test_info = [[], [], []] test_info = [[], [], []]
train_time = [] train_time = []
for batch_id, data in enumerate(train_reader()): batch_id = 0
t1 = time.time() try:
loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) while True:
t2 = time.time() t1 = time.time()
period = t2 - t1 loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list)
loss = np.mean(np.array(loss)) t2 = time.time()
acc1 = np.mean(np.array(acc1)) period = t2 - t1
acc5 = np.mean(np.array(acc5)) loss = np.mean(np.array(loss))
train_info[0].append(loss) acc1 = np.mean(np.array(acc1))
train_info[1].append(acc1) acc5 = np.mean(np.array(acc5))
train_info[2].append(acc5) train_info[0].append(loss)
train_time.append(period) train_info[1].append(acc1)
if batch_id % 10 == 0: train_info[2].append(acc5)
print("Pass {0}, trainbatch {1}, loss {2}, \ train_time.append(period)
acc1 {3}, acc5 {4} time {5}" if batch_id % 10 == 0:
.format(pass_id, \ print("Pass {0}, trainbatch {1}, loss {2}, \
batch_id, loss, acc1, acc5, \ acc1 {3}, acc5 {4} time {5}"
"%2.2f sec" % period)) .format(pass_id, batch_id, loss, acc1, acc5,
sys.stdout.flush() "%2.2f sec" % period))
sys.stdout.flush()
batch_id += 1
except fluid.core.EOFException:
train_py_reader.reset()
train_loss = np.array(train_info[0]).mean() train_loss = np.array(train_info[0]).mean()
train_acc1 = np.array(train_info[1]).mean() train_acc1 = np.array(train_info[1]).mean()
train_acc5 = np.array(train_info[2]).mean() train_acc5 = np.array(train_info[2]).mean()
train_speed = np.array(train_time).mean() / train_batch_size train_speed = np.array(train_time).mean() / train_batch_size
cnt = 0
for test_batch_id, data in enumerate(test_reader()): test_py_reader.start()
t1 = time.time()
loss, acc1, acc5 = exe.run(test_program, test_batch_id = 0
fetch_list=fetch_list, try:
feed=feeder.feed(data)) while True:
t2 = time.time() t1 = time.time()
period = t2 - t1 loss, acc1, acc5 = exe.run(program=test_prog,
loss = np.mean(loss) fetch_list=test_fetch_list)
acc1 = np.mean(acc1) t2 = time.time()
acc5 = np.mean(acc5) period = t2 - t1
test_info[0].append(loss * len(data)) loss = np.mean(loss)
test_info[1].append(acc1 * len(data)) acc1 = np.mean(acc1)
test_info[2].append(acc5 * len(data)) acc5 = np.mean(acc5)
cnt += len(data) test_info[0].append(loss)
if test_batch_id % 10 == 0: test_info[1].append(acc1)
print("Pass {0},testbatch {1},loss {2}, \ test_info[2].append(acc5)
acc1 {3},acc5 {4},time {5}" if test_batch_id % 10 == 0:
.format(pass_id, \ print("Pass {0},testbatch {1},loss {2}, \
test_batch_id, loss, acc1, acc5, \ acc1 {3},acc5 {4},time {5}"
"%2.2f sec" % period)) .format(pass_id, test_batch_id, loss, acc1, acc5,
sys.stdout.flush() "%2.2f sec" % period))
sys.stdout.flush()
test_loss = np.sum(test_info[0]) / cnt test_batch_id += 1
test_acc1 = np.sum(test_info[1]) / cnt except fluid.core.EOFException:
test_acc5 = np.sum(test_info[2]) / cnt test_py_reader.reset()
test_loss = np.array(test_info[0]).mean()
test_acc1 = np.array(test_info[1]).mean()
test_acc5 = np.array(test_info[2]).mean()
print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, " print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
"test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(pass_id, \ "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(
train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ pass_id, train_loss, train_acc1, train_acc5, test_loss,
test_acc5)) test_acc1, test_acc5))
sys.stdout.flush() sys.stdout.flush()
model_path = os.path.join(model_save_dir + '/' + model_name, model_path = os.path.join(model_save_dir + '/' + model_name,
str(pass_id)) str(pass_id))
if not os.path.isdir(model_path): if not os.path.isdir(model_path):
os.makedirs(model_path) os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path, main_program=train_prog)
# This is for continuous evaluation only # This is for continuous evaluation only
if args.enable_ce and pass_id == args.num_epochs - 1: if args.enable_ce and pass_id == args.num_epochs - 1:
if gpu_nums == 1: if device_num == 1:
# Use the mean cost/acc for training # Use the mean cost/acc for training
print("kpis train_cost %s" % train_loss) print("kpis train_cost %s" % train_loss)
print("kpis train_acc_top1 %s" % train_acc1) print("kpis train_acc_top1 %s" % train_acc1)
...@@ -270,18 +364,24 @@ def train(args): ...@@ -270,18 +364,24 @@ def train(args):
print("kpis train_speed %s" % train_speed) print("kpis train_speed %s" % train_speed)
else: else:
# Use the mean cost/acc for training # Use the mean cost/acc for training
print("kpis train_cost_card%s %s" % (gpu_nums, train_loss)) print("kpis train_cost_card%s %s" % (device_num, train_loss))
print("kpis train_acc_top1_card%s %s" % (gpu_nums, train_acc1)) print("kpis train_acc_top1_card%s %s" %
print("kpis train_acc_top5_card%s %s" % (gpu_nums, train_acc5)) (device_num, train_acc1))
print("kpis train_acc_top5_card%s %s" %
(device_num, train_acc5))
# Use the mean cost/acc for testing # Use the mean cost/acc for testing
print("kpis test_cost_card%s %s" % (gpu_nums, test_loss)) print("kpis test_cost_card%s %s" % (device_num, test_loss))
print("kpis test_acc_top1_card%s %s" % (gpu_nums, test_acc1)) print("kpis test_acc_top1_card%s %s" % (device_num, test_acc1))
print("kpis test_acc_top5_card%s %s" % (gpu_nums, test_acc5)) print("kpis test_acc_top5_card%s %s" % (device_num, test_acc5))
print("kpis train_speed_card%s %s" % (gpu_nums, train_speed)) print("kpis train_speed_card%s %s" % (device_num, train_speed))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
models_now = args.model_category
assert models_now in ["models", "models_name"], "{} is not in lists: {}".format(
models_now, ["models", "models_name"])
set_models(models_now)
print_arguments(args) print_arguments(args)
train(args) train(args)
......
from .learning_rate import cosine_decay, lr_warmup
...@@ -27,8 +27,8 @@ def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): ...@@ -27,8 +27,8 @@ def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
Argument learning_rate can be float or a Variable Argument learning_rate can be float or a Variable
lr = lr + (warmup_rate * step / warmup_steps) lr = lr + (warmup_rate * step / warmup_steps)
""" """
assert(isinstance(end_lr, float)) assert (isinstance(end_lr, float))
assert(isinstance(start_lr, float)) assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr linear_step = end_lr - start_lr
with fluid.default_main_program()._lr_schedule_guard(): with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var( lr = fluid.layers.tensor.create_global_var(
...@@ -42,9 +42,10 @@ def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): ...@@ -42,9 +42,10 @@ def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
with fluid.layers.control_flow.Switch() as switch: with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps): with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step / warmup_steps) decayed_lr = start_lr + linear_step * (global_step /
warmup_steps)
fluid.layers.tensor.assign(decayed_lr, lr) fluid.layers.tensor.assign(decayed_lr, lr)
with switch.default(): with switch.default():
fluid.layers.tensor.assign(learning_rate, lr) fluid.layers.tensor.assign(learning_rate, lr)
return lr return lr
\ No newline at end of file
...@@ -80,7 +80,7 @@ ...@@ -80,7 +80,7 @@
在训练时,我们通过选项`--train_images``--train_list` 分别设置准备好的`train_images``train_list` 在训练时,我们通过选项`--train_images``--train_list` 分别设置准备好的`train_images``train_list`
>**注:** 如果`--train_images` 和 `--train_list`都未设置或设置为None, ctc_reader.py会自动下载使用[示例数据](http://paddle-ocr-data.bj.bcebos.com/data.tar.gz),并将其缓存到`$HOME/.cache/paddle/dataset/ctc_data/data/` 路径下。 >**注:** 如果`--train_images` 和 `--train_list`都未设置或设置为None, reader.py会自动下载使用[示例数据](http://paddle-ocr-data.bj.bcebos.com/data.tar.gz),并将其缓存到`$HOME/.cache/paddle/dataset/ctc_data/data/` 路径下。
**B. 测试集和评估集** **B. 测试集和评估集**
...@@ -119,17 +119,17 @@ data/test_images/00003.jpg ...@@ -119,17 +119,17 @@ data/test_images/00003.jpg
使用默认数据在GPU单卡上训练: 使用默认数据在GPU单卡上训练:
``` ```
env CUDA_VISIBLE_DEVICES=0 python ctc_train.py env CUDA_VISIBLE_DEVICES=0 python train.py
``` ```
使用默认数据在CPU上训练: 使用默认数据在CPU上训练:
``` ```
env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False --parallel=False env OMP_NUM_THREADS=<num_of_physical_cores> python train.py --use_gpu False --parallel=False
``` ```
使用默认数据在GPU多卡上训练: 使用默认数据在GPU多卡上训练:
``` ```
env CUDA_VISIBLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True env CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --parallel=True
``` ```
默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model` 默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model`
...@@ -197,3 +197,10 @@ env CUDA_VISIBLE_DEVICE=0 python infer.py \ ...@@ -197,3 +197,10 @@ env CUDA_VISIBLE_DEVICE=0 python infer.py \
--model_path="models/model_00044_15000" \ --model_path="models/model_00044_15000" \
--input_images_list="data/test.list" --input_images_list="data/test.list"
``` ```
## 预训练模型
|模型| 错误率|
|- |:-: |
|[ocr_ctc_params](https://drive.google.com/open?id=1gsg2ODO2_F2pswXwW5MXpf8RY8-BMRyZ) | 22.3% |
|[ocr_attention_params](https://drive.google.com/open?id=1Bx7-94mngyTaMA5kVjzYHDPAdXxOYbRm) | 15.8%|
...@@ -38,12 +38,10 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file, ...@@ -38,12 +38,10 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
for data in test_data(): for data in test_data():
word = to_lodtensor([x[0] for x in data], place) word = to_lodtensor([x[0] for x in data], place)
mark = to_lodtensor([x[1] for x in data], place) mark = to_lodtensor([x[1] for x in data], place)
target = to_lodtensor([x[2] for x in data], place)
crf_decode = exe.run( crf_decode = exe.run(
inference_program, inference_program,
feed={"word": word, feed={"word": word,
"mark": mark, "mark": mark},
"target": target},
fetch_list=fetch_targets, fetch_list=fetch_targets,
return_numpy=False) return_numpy=False)
lod_info = (crf_decode[0].lod())[0] lod_info = (crf_decode[0].lod())[0]
......
...@@ -61,22 +61,21 @@ def main(train_data_file, ...@@ -61,22 +61,21 @@ def main(train_data_file,
avg_cost, feature_out, word, mark, target = ner_net( avg_cost, feature_out, word, mark, target = ner_net(
word_dict_len, label_dict_len, parallel) word_dict_len, label_dict_len, parallel)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
(precision, recall, f1_score, num_infer_chunks, num_label_chunks, (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval( num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode, input=crf_decode,
label=target, label=target,
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
chunk_evaluator = fluid.metrics.ChunkEvaluator() chunk_evaluator = fluid.metrics.ChunkEvaluator()
inference_program = fluid.default_main_program().clone(for_test=True) inference_program = fluid.default_main_program().clone(for_test=True)
test_fetch_list = [num_infer_chunks, num_label_chunks, num_correct_chunks] test_fetch_list = [num_infer_chunks, num_label_chunks, num_correct_chunks]
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
if "CE_MODE_X" not in os.environ: if "CE_MODE_X" not in os.environ:
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -135,7 +134,7 @@ def main(train_data_file, ...@@ -135,7 +134,7 @@ def main(train_data_file,
" pass_f1_score:" + str(test_pass_f1_score)) " pass_f1_score:" + str(test_pass_f1_score))
save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id)
fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'], fluid.io.save_inference_model(save_dirname, ['word', 'mark'],
crf_decode, exe) crf_decode, exe)
if "CE_MODE_X" in os.environ: if "CE_MODE_X" in os.environ:
......
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${text_matching_on_quora:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True)
train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0)
train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True)
train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0)
train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_avg_cost_card1_kpi,
train_avg_acc_card1_kpi,
each_pass_duration_card4_kpi,
train_avg_cost_card4_kpi,
train_avg_acc_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -21,7 +21,12 @@ import numpy as np ...@@ -21,7 +21,12 @@ import numpy as np
import time, datetime import time, datetime
import os, sys import os, sys
def maybe_open(filepath):
if sys.version_info <= (3, 0): # for python2
return open(filepath, 'r')
else:
return open(filepath, 'r', encoding="utf-8")
def Glove840B_300D(filepath, keys=None): def Glove840B_300D(filepath, keys=None):
""" """
input: the "glove.840B.300d.txt" file path input: the "glove.840B.300d.txt" file path
...@@ -33,7 +38,7 @@ def Glove840B_300D(filepath, keys=None): ...@@ -33,7 +38,7 @@ def Glove840B_300D(filepath, keys=None):
print("please wait for a minute.") print("please wait for a minute.")
start = time.time() start = time.time()
word2vec = {} word2vec = {}
with open(filepath, "r") as f: with maybe_open(filepath) as f:
for line in f: for line in f:
if sys.version_info <= (3, 0): # for python2 if sys.version_info <= (3, 0): # for python2
line = line.decode('utf-8') line = line.decode('utf-8')
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
#!/bin/bash
wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
tar -zxvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册