提交 dcf40fd0 编写于 作者: L Luo Tao

refine benchmark

上级 f7a60017
...@@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl ...@@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
caffe/image/logs caffe/image/logs
tensorflow/image/logs tensorflow/image/logs
tensorflow/rnn/logs tensorflow/rnn/logs
fluid/models/*.pyc
fluid/logs
fluid/nohup.out
...@@ -40,10 +40,7 @@ def parse_args(): ...@@ -40,10 +40,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.') '--batch_size', type=int, default=32, help='The minibatch size.')
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate', type=float, default=0.001, help='The learning rate.')
type=float,
default=0.001,
help='The minibatch size.')
# TODO(wuyi): add "--use_fake_data" option back. # TODO(wuyi): add "--use_fake_data" option back.
parser.add_argument( parser.add_argument(
'--skip_batch_num', '--skip_batch_num',
...@@ -72,6 +69,11 @@ def parse_args(): ...@@ -72,6 +69,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.') help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
parser.add_argument(
'--cpus',
type=int,
default=1,
help='If cpus > 1, will use ParallelDo to run, else use Executor.')
parser.add_argument( parser.add_argument(
'--data_set', '--data_set',
type=str, type=str,
...@@ -231,10 +233,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, ...@@ -231,10 +233,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
train_losses.append(loss) train_losses.append(loss)
print("Pass: %d, Iter: %d, Loss: %f\n" % print("Pass: %d, Iter: %d, Loss: %f\n" %
(pass_id, iters, np.mean(train_losses))) (pass_id, iters, np.mean(train_losses)))
train_elapsed = time.time() - start_time print_train_time(start_time, time.time(), num_samples)
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sec\n' %
(num_samples, train_elapsed, examples_per_sec))
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))) print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses)))
# evaluation # evaluation
if not args.no_test and batch_acc != None: if not args.no_test and batch_acc != None:
...@@ -315,10 +314,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, ...@@ -315,10 +314,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
if batch_id % 1 == 0: if batch_id % 1 == 0:
print("Pass %d, batch %d, loss %s" % print("Pass %d, batch %d, loss %s" %
(pass_id, batch_id, np.array(loss))) (pass_id, batch_id, np.array(loss)))
train_elapsed = time.time() - start_time print_train_time(start_time, time.time(), num_samples)
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
if not args.no_test and batch_acc != None: if not args.no_test and batch_acc != None:
test_acc = test(startup_exe, infer_prog, test_reader, feeder, test_acc = test(startup_exe, infer_prog, test_reader, feeder,
batch_acc) batch_acc)
...@@ -329,12 +325,19 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, ...@@ -329,12 +325,19 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
def print_arguments(args): def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU') vars(args)['device'] == 'GPU')
print('----------- resnet Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
def print_train_time(start_time, end_time, num_samples):
train_elapsed = end_time - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
def main(): def main():
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
...@@ -342,7 +345,7 @@ def main(): ...@@ -342,7 +345,7 @@ def main():
# the unique trainer id, starting from 0, needed by trainer # the unique trainer id, starting from 0, needed by trainer
# only # only
nccl_id_var, num_trainers, trainer_id = ( nccl_id_var, num_trainers, trainer_id = (
None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0")))
if args.use_cprof: if args.use_cprof:
pr = cProfile.Profile() pr = cProfile.Profile()
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# This script benchmarking the PaddlePaddle Fluid on # This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU. # single thread single GPU.
mkdir -p logs
#export FLAGS_fraction_of_gpu_memory_to_use=0.0 #export FLAGS_fraction_of_gpu_memory_to_use=0.0
export CUDNN_PATH=/paddle/cudnn_v5 export CUDNN_PATH=/paddle/cudnn_v5
...@@ -35,6 +36,7 @@ nohup stdbuf -oL nvidia-smi \ ...@@ -35,6 +36,7 @@ nohup stdbuf -oL nvidia-smi \
--format=csv \ --format=csv \
--filename=mem.log \ --filename=mem.log \
-l 1 & -l 1 &
# mnist # mnist
# mnist gpu mnist 128 # mnist gpu mnist 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
...@@ -43,7 +45,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ ...@@ -43,7 +45,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=500 \ --iterations=500 \
2>&1 | tee -a mnist_gpu_128.log 2>&1 | tee -a logs/mnist_gpu_128.log
# vgg16 # vgg16
# gpu cifar10 128 # gpu cifar10 128
...@@ -53,7 +55,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ ...@@ -53,7 +55,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a vgg16_gpu_128.log 2>&1 | tee -a logs/vgg16_gpu_128.log
# flowers gpu 128 # flowers gpu 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
...@@ -63,28 +65,28 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ ...@@ -63,28 +65,28 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--data_set=flowers \ --data_set=flowers \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a vgg16_gpu_flowers_32.log 2>&1 | tee -a logs/vgg16_gpu_flowers_32.log
# resnet50 # resnet50
# resnet50 gpu cifar10 128 # resnet50 gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet50 \ --model=resnet \
--device=GPU \ --device=GPU \
--batch_size=128 \ --batch_size=128 \
--data_set=cifar10 \ --data_set=cifar10 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a resnet50_gpu_128.log 2>&1 | tee -a logs/resnet50_gpu_128.log
# resnet50 gpu flowers 64 # resnet50 gpu flowers 64
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet50 \ --model=resnet \
--device=GPU \ --device=GPU \
--batch_size=64 \ --batch_size=64 \
--data_set=flowers \ --data_set=flowers \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a resnet50_gpu_flowers_64.log 2>&1 | tee -a logs/resnet50_gpu_flowers_64.log
# lstm # lstm
# lstm gpu imdb 32 # tensorflow only support batch=32 # lstm gpu imdb 32 # tensorflow only support batch=32
...@@ -94,7 +96,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ ...@@ -94,7 +96,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=32 \ --batch_size=32 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a lstm_gpu_32.log 2>&1 | tee -a logs/lstm_gpu_32.log
# seq2seq # seq2seq
# seq2seq gpu wmb 128 # seq2seq gpu wmb 128
...@@ -104,4 +106,4 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ ...@@ -104,4 +106,4 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a lstm_gpu_128.log 2>&1 | tee -a logs/lstm_gpu_128.log
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册