提交 6d1328c5 编写于 作者: C Chen Weihang

add dataloader test scripts

上级 109a3c75
#!/bin/bash
mkdir dataloader_test_log
# single card
python ./mnist/train.py > dataloader_test_log/mnist 2>&1
python ./resnet/train.py > dataloader_test_log/resnet 2>&1
python ./se_resnet/train.py > dataloader_test_log/se_resnet 2>&1
python ./transformer/train.py > dataloader_test_log/transformer 2>&1
python ./mnist/train_sp.py > dataloader_test_log/mnist_sp 2>&1
python ./resnet/train_sp.py > dataloader_test_log/resnet_sp 2>&1
python ./se_resnet/train_sp.py > dataloader_test_log/se_resnet_sp 2>&1
python ./transformer/train_sp.py > dataloader_test_log/transformer_sp 2>&1
python ./mnist/train_mp.py > dataloader_test_log/mnist_mp 2>&1
python ./resnet/train_mp.py > dataloader_test_log/resnet_mp 2>&1
python ./se_resnet/train_mp.py > dataloader_test_log/se_resnet_mp 2>&1
python ./transformer/train_mp.py > dataloader_test_log/transformer_mp 2>&1
# multiple card
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/mnist_8 ./mnist/train.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/resnet_8 ./resnet/train.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/se_resnet_8 ./se_resnet/train.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/transformer_8 ./transformer/train.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/mnist_8_sp ./mnist/train_sp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/resnet_8_sp ./resnet/train_sp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/se_resnet_8_sp ./se_resnet/train_sp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/transformer_8_sp ./transformer/train_sp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/mnist_8_mp ./mnist/train_mp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/resnet_8_mp ./resnet/train_mp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/se_resnet_8_mp ./se_resnet/train_mp.py --use_data_parallel 1
python -m paddle.distributed.launch --log_dir ./dataloader_test_log/transformer_8_mp ./transformer/train_mp.py --use_data_parallel 1
\ No newline at end of file
......@@ -15,6 +15,7 @@
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
from PIL import Image
import os
......@@ -33,7 +34,7 @@ def parse_args():
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument("-e", "--epoch", default=5, type=int, help="set epoch")
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args()
return args
......@@ -200,7 +201,9 @@ def train_mnist(args):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)
total_train_time = 0
for epoch in range(epoch_num):
stime = time.time()
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
......@@ -229,6 +232,7 @@ def train_mnist(args):
if batch_id % 100 == 0:
print("Loss at epoch {} step {}: {:}".format(
epoch, batch_id, avg_loss.numpy()))
total_train_time += (time.time() - stime)
mnist.eval()
test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
......@@ -249,6 +253,7 @@ def train_mnist(args):
inference_mnist()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
from PIL import Image
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable
def parse_args():
parser = argparse.ArgumentParser("Training for Mnist.")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args()
return args
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
act=act,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.dygraph.Layer):
def __init__(self):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu")
self.pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(self.pool_2_shape, 10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs, label=None):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
x = self._fc(x)
if label is not None:
acc = fluid.layers.accuracy(input=x, label=label)
return x, acc
else:
return x
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(1, 28, 28)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def test_mnist(reader, model, batch_size):
acc_set = []
avg_loss_set = []
for batch_id, data in enumerate(reader()):
img, label = data
label.stop_gradient = True
prediction, acc = model(img, label)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
acc_set.append(float(acc.numpy()))
avg_loss_set.append(float(avg_loss.numpy()))
# get test acc and loss
acc_val_mean = np.array(acc_set).mean()
avg_loss_val_mean = np.array(avg_loss_set).mean()
return avg_loss_val_mean, acc_val_mean
def inference_mnist():
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
mnist_infer = MNIST()
# load checkpoint
model_dict, _ = fluid.load_dygraph("save_temp")
mnist_infer.set_dict(model_dict)
print("checkpoint loaded")
# start evaluate mode
mnist_infer.eval()
def load_image(file):
im = Image.open(file).convert('L')
im = im.resize((28, 28), Image.ANTIALIAS)
im = np.array(im).reshape(1, 1, 28, 28).astype(np.float32)
im = im / 255.0 * 2.0 - 1.0
return im
cur_dir = os.path.dirname(os.path.realpath(__file__))
tensor_img = load_image(cur_dir + '/image/infer_3.png')
results = mnist_infer(to_variable(tensor_img))
lab = np.argsort(results.numpy())
print("Inference result of image/infer_3.png is: %d" % lab[0][-1])
def train_mnist(args):
epoch_num = args.epoch
BATCH_SIZE = 64
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
mnist = MNIST()
adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters())
if args.use_data_parallel:
mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.mnist.train()),
batch_size=BATCH_SIZE,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.mnist.test()),
batch_size=BATCH_SIZE,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
test_loader.set_sample_list_generator(test_reader, places=place)
total_train_time = 0
for epoch in range(epoch_num):
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img, label = data
label.stop_gradient = True
cost, acc = mnist(img, label)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
if args.use_data_parallel:
avg_loss = mnist.scale_loss(avg_loss)
avg_loss.backward()
mnist.apply_collective_grads()
else:
avg_loss.backward()
adam.minimize(avg_loss)
# save checkpoint
mnist.clear_gradients()
if batch_id % 100 == 0:
print("Loss at epoch {} step {}: {:}".format(
epoch, batch_id, avg_loss.numpy()))
total_train_time += (time.time() - stime)
mnist.eval()
test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE)
mnist.train()
if args.ce:
print("kpis\ttest_acc\t%s" % test_acc)
print("kpis\ttest_cost\t%s" % test_cost)
print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
epoch, test_cost, test_acc))
save_parameters = (not args.use_data_parallel) or (
args.use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
fluid.save_dygraph(mnist.state_dict(), "save_temp")
print("checkpoint saved")
inference_mnist()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
train_mnist(args)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
from PIL import Image
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable
def parse_args():
parser = argparse.ArgumentParser("Training for Mnist.")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args()
return args
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
act=act,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.dygraph.Layer):
def __init__(self):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu")
self.pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(self.pool_2_shape, 10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs, label=None):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
x = self._fc(x)
if label is not None:
acc = fluid.layers.accuracy(input=x, label=label)
return x, acc
else:
return x
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(1, 28, 28)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def test_mnist(reader, model, batch_size):
acc_set = []
avg_loss_set = []
for batch_id, data in enumerate(reader()):
img, label = data
label.stop_gradient = True
prediction, acc = model(img, label)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
acc_set.append(float(acc.numpy()))
avg_loss_set.append(float(avg_loss.numpy()))
# get test acc and loss
acc_val_mean = np.array(acc_set).mean()
avg_loss_val_mean = np.array(avg_loss_set).mean()
return avg_loss_val_mean, acc_val_mean
def inference_mnist():
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
mnist_infer = MNIST()
# load checkpoint
model_dict, _ = fluid.load_dygraph("save_temp")
mnist_infer.set_dict(model_dict)
print("checkpoint loaded")
# start evaluate mode
mnist_infer.eval()
def load_image(file):
im = Image.open(file).convert('L')
im = im.resize((28, 28), Image.ANTIALIAS)
im = np.array(im).reshape(1, 1, 28, 28).astype(np.float32)
im = im / 255.0 * 2.0 - 1.0
return im
cur_dir = os.path.dirname(os.path.realpath(__file__))
tensor_img = load_image(cur_dir + '/image/infer_3.png')
results = mnist_infer(to_variable(tensor_img))
lab = np.argsort(results.numpy())
print("Inference result of image/infer_3.png is: %d" % lab[0][-1])
def train_mnist(args):
epoch_num = args.epoch
BATCH_SIZE = 64
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
mnist = MNIST()
adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters())
if args.use_data_parallel:
mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.mnist.train()),
batch_size=BATCH_SIZE,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.mnist.test()),
batch_size=BATCH_SIZE,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
total_train_time = 0
for epoch in range(epoch_num):
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img, label = data
label.stop_gradient = True
cost, acc = mnist(img, label)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
if args.use_data_parallel:
avg_loss = mnist.scale_loss(avg_loss)
avg_loss.backward()
mnist.apply_collective_grads()
else:
avg_loss.backward()
adam.minimize(avg_loss)
# save checkpoint
mnist.clear_gradients()
if batch_id % 100 == 0:
print("Loss at epoch {} step {}: {:}".format(
epoch, batch_id, avg_loss.numpy()))
total_train_time += (time.time() - stime)
mnist.eval()
test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE)
mnist.train()
if args.ce:
print("kpis\ttest_acc\t%s" % test_acc)
print("kpis\ttest_cost\t%s" % test_cost)
print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
epoch, test_cost, test_acc))
save_parameters = (not args.use_data_parallel) or (
args.use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
fluid.save_dygraph(mnist.state_dict(), "save_temp")
print("checkpoint saved")
inference_mnist()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
train_mnist(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import logging
import paddle.compat as cpt
TEST_LOG_DIR = './dataloader_test_log'
def parse_test_result(dirname):
if not os.path.exists(dirname):
logging.warning("log dir is not exist.")
return
for parent, _, filenames in os.walk(dirname):
for filename in filenames:
filepath = os.path.join(parent, filename)
if not os.path.isfile(filepath):
continue
with open(filepath, 'rb') as f:
for line in f.readlines():
if line.startswith(b'total train time:'):
print("%s - %s" % (filepath, cpt.to_text(line)[:-1]))
if __name__ == "__main__":
parse_test_result(TEST_LOG_DIR)
\ No newline at end of file
......@@ -25,6 +25,7 @@ from paddle.fluid import framework
import math
import sys
import time
IMAGENET1000 = 1281167
base_lr = 0.1
......@@ -41,7 +42,7 @@ def parse_args():
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument(
"-e", "--epoch", default=120, type=int, help="set epoch")
"-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument(
"-b", "--batch_size", default=32, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
......@@ -310,6 +311,7 @@ def train_resnet():
#file_name = './model/epoch_0.npz'
#model_data = np.load( file_name )
total_train_time = 0
for eop in range(epoch):
resnet.train()
......@@ -324,6 +326,7 @@ def train_resnet():
print("load finished")
stime = time.time()
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
......@@ -365,6 +368,7 @@ def train_resnet():
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
( eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
......@@ -383,7 +387,8 @@ def train_resnet():
fluid.save_dygraph(resnet.state_dict(),
'resnet_params')
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
if __name__ == '__main__':
train_resnet()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import argparse
import ast
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
import math
import sys
import time
IMAGENET1000 = 1281167
base_lr = 0.1
momentum_rate = 0.9
l2_decay = 1e-4
def parse_args():
parser = argparse.ArgumentParser("Training for Resnet.")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument(
"-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument(
"-b", "--batch_size", default=32, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args()
return args
args = parse_args()
batch_size = args.batch_size
def optimizer_setting(parameter_list=None):
total_images = IMAGENET1000
step = int(math.ceil(float(total_images) / batch_size))
epochs = [30, 60, 90]
bd = [step * e for e in epochs]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
if fluid.in_dygraph_mode():
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
else:
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=102):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(self.pool2d_avg_output,
class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y)
return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data):
model.eval()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
for batch_id, data in enumerate(data()):
img = data[0]
label = data[1]
label.stop_gradient = True
out = model(img)
#loss = fluid.layers.cross_entropy(input=out, label=label)
#avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
#dy_out = avg_loss.numpy()
#total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
# print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
if batch_id % 10 == 0:
print("test | batch step %d, acc1 %0.3f acc5 %0.3f" % \
( batch_id, total_acc1 / total_sample, total_acc5 / total_sample))
if args.ce:
print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample))
print("final eval acc1 %0.3f acc5 %0.3f" % \
(total_acc1 / total_sample, total_acc5 / total_sample))
def train_resnet():
epoch = args.epoch
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
resnet = ResNet()
optimizer = optimizer_setting(parameter_list=resnet.parameters())
if args.use_data_parallel:
resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.test(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
test_loader.set_sample_list_generator(test_reader, places=place)
#file_name = './model/epoch_0.npz'
#model_data = np.load( file_name )
total_train_time = 0
for eop in range(epoch):
resnet.train()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
#dict_state = resnet.state_dict()
#resnet.load_dict( model_data )
print("load finished")
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img = data[0]
label = data[1]
label.stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
dy_out = avg_loss.numpy()
if args.use_data_parallel:
avg_loss = resnet.scale_loss(avg_loss)
avg_loss.backward()
resnet.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
resnet.clear_gradients()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
#print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
if batch_id % 10 == 0:
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
( eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
resnet.eval()
eval(resnet, test_loader)
save_parameters = (not args.use_data_parallel) or (
args.use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
fluid.save_dygraph(resnet.state_dict(),
'resnet_params')
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
train_resnet()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import argparse
import ast
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
import math
import sys
import time
IMAGENET1000 = 1281167
base_lr = 0.1
momentum_rate = 0.9
l2_decay = 1e-4
def parse_args():
parser = argparse.ArgumentParser("Training for Resnet.")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
parser.add_argument(
"-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument(
"-b", "--batch_size", default=32, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args()
return args
args = parse_args()
batch_size = args.batch_size
def optimizer_setting(parameter_list=None):
total_images = IMAGENET1000
step = int(math.ceil(float(total_images) / batch_size))
epochs = [30, 60, 90]
bd = [step * e for e in epochs]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
if fluid.in_dygraph_mode():
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
else:
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=102):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(self.pool2d_avg_output,
class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y)
return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data):
model.eval()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
for batch_id, data in enumerate(data()):
img = data[0]
label = data[1]
label.stop_gradient = True
out = model(img)
#loss = fluid.layers.cross_entropy(input=out, label=label)
#avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
#dy_out = avg_loss.numpy()
#total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
# print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
if batch_id % 10 == 0:
print("test | batch step %d, acc1 %0.3f acc5 %0.3f" % \
( batch_id, total_acc1 / total_sample, total_acc5 / total_sample))
if args.ce:
print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample))
print("final eval acc1 %0.3f acc5 %0.3f" % \
(total_acc1 / total_sample, total_acc5 / total_sample))
def train_resnet():
epoch = args.epoch
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
resnet = ResNet()
optimizer = optimizer_setting(parameter_list=resnet.parameters())
if args.use_data_parallel:
resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.test(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
#file_name = './model/epoch_0.npz'
#model_data = np.load( file_name )
total_train_time = 0
for eop in range(epoch):
resnet.train()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
#dict_state = resnet.state_dict()
#resnet.load_dict( model_data )
print("load finished")
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img = data[0]
label = data[1]
label.stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
dy_out = avg_loss.numpy()
if args.use_data_parallel:
avg_loss = resnet.scale_loss(avg_loss)
avg_loss.backward()
resnet.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
resnet.clear_gradients()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
#print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
if batch_id % 10 == 0:
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
( eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
resnet.eval()
eval(resnet, test_loader)
save_parameters = (not args.use_data_parallel) or (
args.use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
fluid.save_dygraph(resnet.state_dict(),
'resnet_params')
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
train_resnet()
......@@ -16,6 +16,7 @@ import contextlib
import unittest
import numpy as np
import six
import time
import paddle
import paddle.fluid as fluid
......@@ -29,7 +30,7 @@ import argparse
import ast
parser = argparse.ArgumentParser("Training for Se-ResNeXt.")
parser.add_argument("-e", "--epoch", default=200, type=int, help="set epoch")
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
parser.add_argument(
"--use_data_parallel",
......@@ -52,7 +53,7 @@ train_parameters = {
"batch_size": batch_size,
"lr": 0.0125,
"total_images": 6149,
"num_epochs": 200
"num_epochs": 1
}
momentum_rate = 0.9
......@@ -99,8 +100,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(name="weights"))
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
......@@ -399,11 +399,13 @@ def train():
test_reader = paddle.batch(
paddle.dataset.flowers.test(use_xmap=False), batch_size=32)
total_train_time = 0
for epoch_id in range(epoch_num):
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
stime = time.time()
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array([x[0].reshape(3, 224, 224)
......@@ -446,6 +448,7 @@ def train():
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f lr %0.5f" % \
( epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample, lr))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
......@@ -458,6 +461,8 @@ def train():
eval(se_resnext, test_reader)
se_resnext.train()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
train()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import six
import time
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
import sys
import math
import argparse
import ast
parser = argparse.ArgumentParser("Training for Se-ResNeXt.")
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
args = parser.parse_args()
batch_size = 64
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "cosine_decay",
"batch_size": batch_size,
"epochs": [40, 80, 100],
"steps": [0.1, 0.01, 0.001, 0.0001]
},
"batch_size": batch_size,
"lr": 0.0125,
"total_images": 6149,
"num_epochs": 1
}
momentum_rate = 0.9
l2_decay = 1.2e-4
def optimizer_setting(params, parameter_list):
ls = params["learning_strategy"]
if "total_images" not in params:
total_images = 6149
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(math.ceil(float(total_images) / batch_size))
bd = [step * e for e in ls["epochs"]]
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
return optimizer
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__()
self._num_channels = num_channels
self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._fc = Linear(
num_channels,
num_channels // reduction_ratio,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='relu')
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
self._excitation = Linear(
num_channels // reduction_ratio,
num_channels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='sigmoid')
def forward(self, input):
y = self._pool(input)
y = fluid.layers.reshape(y, shape=[-1, self._num_channels])
y = self._fc(y)
y = self._excitation(y)
y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act="relu")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2,
filter_size=1,
act=None)
self.scale = SqueezeExcitation(
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 2
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
scale = self.scale(conv2)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=scale, act='relu')
return y
class SeResNeXt(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=102):
super(SeResNeXt, self).__init__()
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=1,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=64,
num_filters=128,
filter_size=3,
stride=1,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
if layers == 152:
num_channels = 128
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=cardinality,
reduction_ratio=reduction_ratio,
shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1
self.out = Linear(self.pool2d_avg_output,
class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
if self.layers == 50 or self.layers == 101:
y = self.conv0(inputs)
y = self.pool(y)
elif self.layers == 152:
y = self.conv0(inputs)
y = self.conv1(y)
y = self.conv2(y)
y = self.pool(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.dropout(y, dropout_prob=0.5, seed=100)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y)
return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data):
model.eval()
batch_size = 32
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
for batch_id, data in enumerate(data()):
img, label = data
label.stop_gradient = True
out = model(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
dy_out = avg_loss.numpy()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
if batch_id % 10 == 0:
print("test | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
( batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
if args.ce:
print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttest_loss\t%0.3f" % (total_loss / total_sample))
print("final eval loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
def train():
epoch_num = train_parameters["num_epochs"]
if args.ce:
epoch_num = args.epoch
batch_size = train_parameters["batch_size"]
trainer_count = fluid.dygraph.parallel.Env().nranks
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 90
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
se_resnext = SeResNeXt()
optimizer = optimizer_setting(train_parameters, se_resnext.parameters())
if args.use_data_parallel:
se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext,
strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.test(use_xmap=False)),
batch_size=32)
train_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
test_loader.set_sample_list_generator(test_reader, places=place)
total_train_time = 0
for epoch_id in range(epoch_num):
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img, label = data
label.stop_gradient = True
out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(
input=softmax_out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=softmax_out, label=label, k=5)
dy_out = avg_loss.numpy()
if args.use_data_parallel:
avg_loss = se_resnext.scale_loss(avg_loss)
avg_loss.backward()
se_resnext.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
se_resnext.clear_gradients()
lr = optimizer._global_learning_rate().numpy()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
if batch_id % 10 == 0:
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f lr %0.5f" % \
( epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample, lr))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
se_resnext.eval()
eval(se_resnext, test_loader)
se_resnext.train()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
train()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import six
import time
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
import sys
import math
import argparse
import ast
parser = argparse.ArgumentParser("Training for Se-ResNeXt.")
parser.add_argument("-e", "--epoch", default=1, type=int, help="set epoch")
parser.add_argument("--ce", action="store_true", help="run ce")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use data parallel mode to train the model."
)
args = parser.parse_args()
batch_size = 64
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "cosine_decay",
"batch_size": batch_size,
"epochs": [40, 80, 100],
"steps": [0.1, 0.01, 0.001, 0.0001]
},
"batch_size": batch_size,
"lr": 0.0125,
"total_images": 6149,
"num_epochs": 1
}
momentum_rate = 0.9
l2_decay = 1.2e-4
def optimizer_setting(params, parameter_list):
ls = params["learning_strategy"]
if "total_images" not in params:
total_images = 6149
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(math.ceil(float(total_images) / batch_size))
bd = [step * e for e in ls["epochs"]]
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
return optimizer
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__()
self._num_channels = num_channels
self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._fc = Linear(
num_channels,
num_channels // reduction_ratio,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='relu')
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
self._excitation = Linear(
num_channels // reduction_ratio,
num_channels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='sigmoid')
def forward(self, input):
y = self._pool(input)
y = fluid.layers.reshape(y, shape=[-1, self._num_channels])
y = self._fc(y)
y = self._excitation(y)
y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act="relu")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2,
filter_size=1,
act=None)
self.scale = SqueezeExcitation(
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 2
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
scale = self.scale(conv2)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=scale, act='relu')
return y
class SeResNeXt(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=102):
super(SeResNeXt, self).__init__()
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=1,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=64,
num_filters=128,
filter_size=3,
stride=1,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
if layers == 152:
num_channels = 128
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=cardinality,
reduction_ratio=reduction_ratio,
shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1
self.out = Linear(self.pool2d_avg_output,
class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
if self.layers == 50 or self.layers == 101:
y = self.conv0(inputs)
y = self.pool(y)
elif self.layers == 152:
y = self.conv0(inputs)
y = self.conv1(y)
y = self.conv2(y)
y = self.pool(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.dropout(y, dropout_prob=0.5, seed=100)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y)
return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data):
model.eval()
batch_size = 32
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
for batch_id, data in enumerate(data()):
img, label = data
label.stop_gradient = True
out = model(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
dy_out = avg_loss.numpy()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
if batch_id % 10 == 0:
print("test | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
( batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
if args.ce:
print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttest_loss\t%0.3f" % (total_loss / total_sample))
print("final eval loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
def train():
epoch_num = train_parameters["num_epochs"]
if args.ce:
epoch_num = args.epoch
batch_size = train_parameters["batch_size"]
trainer_count = fluid.dygraph.parallel.Env().nranks
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.ce:
print("ce mode")
seed = 90
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
se_resnext = SeResNeXt()
optimizer = optimizer_setting(train_parameters, se_resnext.parameters())
if args.use_data_parallel:
se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext,
strategy)
train_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
test_reader = paddle.batch(
reader_decorator(
paddle.dataset.flowers.test(use_xmap=False)),
batch_size=32)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
total_train_time = 0
for epoch_id in range(epoch_num):
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
stime = time.time()
for batch_id, data in enumerate(train_loader()):
img, label = data
label.stop_gradient = True
out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(
input=softmax_out, label=label)
avg_loss = fluid.layers.mean(x=loss)
acc_top1 = fluid.layers.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=softmax_out, label=label, k=5)
dy_out = avg_loss.numpy()
if args.use_data_parallel:
avg_loss = se_resnext.scale_loss(avg_loss)
avg_loss.backward()
se_resnext.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
se_resnext.clear_gradients()
lr = optimizer._global_learning_rate().numpy()
total_loss += dy_out
total_acc1 += acc_top1.numpy()
total_acc5 += acc_top5.numpy()
total_sample += 1
if batch_id % 10 == 0:
print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f lr %0.5f" % \
( epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample, lr))
total_train_time += (time.time() - stime)
if args.ce:
print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
(epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
se_resnext.eval()
eval(se_resnext, test_loader)
se_resnext.train()
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
train()
......@@ -15,6 +15,7 @@
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
import paddle
......@@ -144,10 +145,12 @@ def train(args):
batch_size=TrainTaskConfig.batch_size)
# loop for training iterations
total_train_time = 0
for i in range(TrainTaskConfig.pass_num):
dy_step = 0
sum_cost = 0
transformer.train()
stime = time.time()
for batch in train_reader():
enc_inputs, dec_inputs, label, weights = prepare_train_input(
batch, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx,
......@@ -170,6 +173,7 @@ def train(args):
print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
format(i, dy_step,
dy_avg_cost.numpy() * trainer_count))
total_train_time += (time.time() - stime)
# switch to evaluation mode
transformer.eval()
......@@ -190,6 +194,8 @@ def train(args):
if fluid.dygraph.parallel.Env().dev_id == 0:
fluid.save_dygraph(transformer.state_dict(), args.model_file)
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.dataset.wmt16 as wmt16
from model import TransFormer, NoamDecay
from config import *
from data_util import *
def parse_args():
parser = argparse.ArgumentParser("Arguments for Training")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use multi-GPU.")
parser.add_argument(
"--model_file",
type=str,
default="transformer_params",
help="Save the model as a file named `model_file.pdparams`.")
parser.add_argument(
'opts',
help='See config.py for all options',
default=None,
nargs=argparse.REMAINDER)
args = parser.parse_args()
merge_cfg_from_list(args.opts, [TrainTaskConfig, ModelHyperParams])
return args
def prepare_train_input_array(insts, src_pad_idx, trg_pad_idx, n_head):
"""
inputs for training
"""
src_word, src_pos, src_slf_attn_bias, src_max_len = pad_batch_data(
[inst[0] for inst in insts], src_pad_idx, n_head, is_target=False)
src_word = src_word.reshape(-1, src_max_len)
src_pos = src_pos.reshape(-1, src_max_len)
trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = pad_batch_data(
[inst[1] for inst in insts], trg_pad_idx, n_head, is_target=True)
trg_word = trg_word.reshape(-1, trg_max_len)
trg_pos = trg_pos.reshape(-1, trg_max_len)
trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :],
[1, 1, trg_max_len, 1]).astype("float32")
lbl_word, lbl_weight, num_token = pad_batch_data(
[inst[2] for inst in insts],
trg_pad_idx,
n_head,
is_target=False,
is_label=True,
return_attn_bias=False,
return_max_len=False,
return_num_token=True)
return src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight
def input_data_array_reader(reader, src_pad_idx, trg_pad_idx, n_head):
def __reader__():
r = reader()
for data in r:
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = \
prepare_train_input_array(data, src_pad_idx, trg_pad_idx, n_head)
yield src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight
return __reader__
def group_inputs(var_inputs):
enc_inputs = var_inputs[0:len(encoder_data_input_fields)]
dec_inputs = var_inputs[len(encoder_data_input_fields
):len(encoder_data_input_fields) +
len(decoder_data_input_fields[:-1])]
label = var_inputs[-2]
weights = var_inputs[-1]
return enc_inputs, dec_inputs, label, weights
def train(args):
"""
train models
:return:
"""
trainer_count = fluid.dygraph.parallel.Env().nranks
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
# define model
transformer = TransFormer(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer, ModelHyperParams.n_head,
ModelHyperParams.d_key, ModelHyperParams.d_value,
ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
ModelHyperParams.prepostprocess_dropout,
ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
# define optimizer
optimizer = fluid.optimizer.Adam(learning_rate=NoamDecay(
ModelHyperParams.d_model, TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate),
parameter_list = transformer.parameters(),
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
#
if args.use_data_parallel:
transformer = fluid.dygraph.parallel.DataParallel(
transformer, strategy)
# define data generator for training and validation
train_reader = input_data_array_reader(
paddle.batch(
wmt16.train(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=TrainTaskConfig.batch_size),
ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx,
ModelHyperParams.n_head)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
val_reader = input_data_array_reader(
paddle.batch(
wmt16.test(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=TrainTaskConfig.batch_size),
ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx,
ModelHyperParams.n_head)
train_loader = fluid.io.DataLoader.from_generator(capacity=200, use_multiprocess=True)
train_loader.set_batch_generator(train_reader, places=place)
val_loader = fluid.io.DataLoader.from_generator(capacity=200, use_multiprocess=True)
val_loader.set_batch_generator(val_reader, places=place)
# loop for training iterations
total_train_time = 0
for i in range(TrainTaskConfig.pass_num):
dy_step = 0
sum_cost = 0
transformer.train()
stime = time.time()
for batch in train_loader():
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = batch
enc_inputs, dec_inputs, label, weights = \
group_inputs([src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight])
dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
enc_inputs, dec_inputs, label, weights)
if args.use_data_parallel:
dy_avg_cost = transformer.scale_loss(dy_avg_cost)
dy_avg_cost.backward()
transformer.apply_collective_grads()
else:
dy_avg_cost.backward()
optimizer.minimize(dy_avg_cost)
transformer.clear_gradients()
dy_step = dy_step + 1
if dy_step % 10 == 0:
print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
format(i, dy_step,
dy_avg_cost.numpy() * trainer_count))
total_train_time += (time.time() - stime)
# switch to evaluation mode
transformer.eval()
sum_cost = 0
token_num = 0
for batch in val_loader():
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = batch
enc_inputs, dec_inputs, label, weights = \
group_inputs([src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight])
dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
enc_inputs, dec_inputs, label, weights)
sum_cost += dy_sum_cost.numpy()
token_num += dy_token_num.numpy()
print("pass : {} finished, validation avg loss: {}".format(
i, sum_cost / token_num))
if fluid.dygraph.parallel.Env().dev_id == 0:
fluid.save_dygraph(transformer.state_dict(), args.model_file)
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
train(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import ast
import time
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.dataset.wmt16 as wmt16
from model import TransFormer, NoamDecay
from config import *
from data_util import *
def parse_args():
parser = argparse.ArgumentParser("Arguments for Training")
parser.add_argument(
"--use_data_parallel",
type=ast.literal_eval,
default=False,
help="The flag indicating whether to use multi-GPU.")
parser.add_argument(
"--model_file",
type=str,
default="transformer_params",
help="Save the model as a file named `model_file.pdparams`.")
parser.add_argument(
'opts',
help='See config.py for all options',
default=None,
nargs=argparse.REMAINDER)
args = parser.parse_args()
merge_cfg_from_list(args.opts, [TrainTaskConfig, ModelHyperParams])
return args
def prepare_train_input_array(insts, src_pad_idx, trg_pad_idx, n_head):
"""
inputs for training
"""
src_word, src_pos, src_slf_attn_bias, src_max_len = pad_batch_data(
[inst[0] for inst in insts], src_pad_idx, n_head, is_target=False)
src_word = src_word.reshape(-1, src_max_len)
src_pos = src_pos.reshape(-1, src_max_len)
trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = pad_batch_data(
[inst[1] for inst in insts], trg_pad_idx, n_head, is_target=True)
trg_word = trg_word.reshape(-1, trg_max_len)
trg_pos = trg_pos.reshape(-1, trg_max_len)
trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :],
[1, 1, trg_max_len, 1]).astype("float32")
lbl_word, lbl_weight, num_token = pad_batch_data(
[inst[2] for inst in insts],
trg_pad_idx,
n_head,
is_target=False,
is_label=True,
return_attn_bias=False,
return_max_len=False,
return_num_token=True)
return src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight
def input_data_array_reader(reader, src_pad_idx, trg_pad_idx, n_head):
def __reader__():
r = reader()
for data in r:
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = \
prepare_train_input_array(data, src_pad_idx, trg_pad_idx, n_head)
yield src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight
return __reader__
def group_inputs(var_inputs):
enc_inputs = var_inputs[0:len(encoder_data_input_fields)]
dec_inputs = var_inputs[len(encoder_data_input_fields
):len(encoder_data_input_fields) +
len(decoder_data_input_fields[:-1])]
label = var_inputs[-2]
weights = var_inputs[-1]
return enc_inputs, dec_inputs, label, weights
def train(args):
"""
train models
:return:
"""
trainer_count = fluid.dygraph.parallel.Env().nranks
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if args.use_data_parallel else fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
# define model
transformer = TransFormer(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer, ModelHyperParams.n_head,
ModelHyperParams.d_key, ModelHyperParams.d_value,
ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
ModelHyperParams.prepostprocess_dropout,
ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)
# define optimizer
optimizer = fluid.optimizer.Adam(learning_rate=NoamDecay(
ModelHyperParams.d_model, TrainTaskConfig.warmup_steps,
TrainTaskConfig.learning_rate),
parameter_list = transformer.parameters(),
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
#
if args.use_data_parallel:
transformer = fluid.dygraph.parallel.DataParallel(
transformer, strategy)
# define data generator for training and validation
train_reader = input_data_array_reader(
paddle.batch(
wmt16.train(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=TrainTaskConfig.batch_size),
ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx,
ModelHyperParams.n_head)
if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
val_reader = input_data_array_reader(
paddle.batch(
wmt16.test(
ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=TrainTaskConfig.batch_size),
ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx,
ModelHyperParams.n_head)
train_loader = fluid.io.DataLoader.from_generator(capacity=200)
train_loader.set_batch_generator(train_reader, places=place)
val_loader = fluid.io.DataLoader.from_generator(capacity=200)
val_loader.set_batch_generator(val_reader, places=place)
# loop for training iterations
total_train_time = 0
for i in range(TrainTaskConfig.pass_num):
dy_step = 0
sum_cost = 0
transformer.train()
stime = time.time()
for batch in train_loader():
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = batch
enc_inputs, dec_inputs, label, weights = \
group_inputs([src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight])
dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
enc_inputs, dec_inputs, label, weights)
if args.use_data_parallel:
dy_avg_cost = transformer.scale_loss(dy_avg_cost)
dy_avg_cost.backward()
transformer.apply_collective_grads()
else:
dy_avg_cost.backward()
optimizer.minimize(dy_avg_cost)
transformer.clear_gradients()
dy_step = dy_step + 1
if dy_step % 10 == 0:
print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
format(i, dy_step,
dy_avg_cost.numpy() * trainer_count))
total_train_time += (time.time() - stime)
# switch to evaluation mode
transformer.eval()
sum_cost = 0
token_num = 0
for batch in val_loader():
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, \
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight = batch
enc_inputs, dec_inputs, label, weights = \
group_inputs([src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight])
dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
enc_inputs, dec_inputs, label, weights)
sum_cost += dy_sum_cost.numpy()
token_num += dy_token_num.numpy()
print("pass : {} finished, validation avg loss: {}".format(
i, sum_cost / token_num))
if fluid.dygraph.parallel.Env().dev_id == 0:
fluid.save_dygraph(transformer.state_dict(), args.model_file)
print("total train time: {} s".format(total_train_time))
if __name__ == '__main__':
args = parse_args()
train(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册