diff --git a/PaddleCV/image_classification/scripts/train/ResNet101.sh b/PaddleCV/image_classification/scripts/train/ResNet101.sh index 80e929d4a6fe098a77c4be4f8f9ab97283b0d1b4..d728b0ac92479541c5f5f0c526a800d6be5ebf7c 100644 --- a/PaddleCV/image_classification/scripts/train/ResNet101.sh +++ b/PaddleCV/image_classification/scripts/train/ResNet101.sh @@ -6,9 +6,9 @@ export FLAGS_eager_delete_tensor_gb=0.0 export FLAGS_fraction_of_gpu_memory_to_use=0.98 #ResNet101: -python train.py \ +python3 train.py \ --model=ResNet101 \ - --batch_size=256 \ + --batch_size=1024 \ --model_save_dir=output/ \ --lr_strategy=piecewise_decay \ --num_epochs=120 \ diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index ac67d43d27f847ee2e72ad15885dd91f80cdc548..448b650f7eec8c72f4e7058c6b904803447e83d2 100755 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -79,6 +79,8 @@ def build_program(is_train, main_prog, startup_prog, args): use_dynamic_loss_scaling=args.use_dynamic_loss_scaling) optimizer.minimize(avg_cost) + # print(main_prog) + # return if args.use_ema: global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter( ) @@ -151,6 +153,14 @@ def validate(args, device_num=device_num) +def reader_decorator(reader): + def __reader__(): + for item in reader(): + img = np.array(item[0]).astype('float32').reshape(3, 224, 224) + label = np.array(item[1]).astype('int64').reshape(1) + yield img, label + return __reader__ + def train(args): """Train model @@ -206,6 +216,12 @@ def train(args): else: imagenet_reader = reader.ImageNetReader(0 if num_trainers > 1 else None) train_reader = imagenet_reader.train(settings=args) + train_reader = paddle.batch( + reader_decorator( + paddle.dataset.flowers.train(use_xmap=True)), + batch_size=args.batch_size, + drop_last=True) + if args.use_gpu: if num_trainers <= 1: places = fluid.framework.cuda_places() @@ -261,6 +277,7 @@ def train(args): sys.stdout.flush() train_batch_id += 1 t1 = time.time() + #NOTE: this for benchmark profiler total_batch_num = total_batch_num + 1 if args.is_profiler and pass_id == 0 and train_batch_id == args.print_step: @@ -290,6 +307,7 @@ def train(args): if trainer_id == 0 and pass_id % args.save_step == 0: save_model(args, exe, train_prog, pass_id) + def main():