diff --git a/cnn_e2e/config.py b/cnn_e2e/config.py index 76b0b8633a277184782fbe3ed9c04029400a514d..f444b340fdd658cf9a54bd4cff23586ac1ae8df6 100644 --- a/cnn_e2e/config.py +++ b/cnn_e2e/config.py @@ -6,7 +6,7 @@ import argparse from datetime import datetime import logging -from dali_util import add_dali_args +#from dali_util import add_dali_args from optimizer_util import add_optimizer_args from ofrecord_util import add_ofrecord_args @@ -40,7 +40,7 @@ def get_parser(parser=None): parser.add_argument("--batch_size_per_device", type=int, default=64) parser.add_argument("--val_batch_size_per_device", type=int, default=8) - # for data process + # for data process parser.add_argument("--num_examples", type=int, default=1281167, help="train pic number") parser.add_argument("--num_val_examples", type=int, default=50000, help="validation pic number") parser.add_argument('--rgb-mean', type=float_list, default=[123.68, 116.779, 103.939], @@ -48,6 +48,8 @@ def get_parser(parser=None): parser.add_argument('--rgb-std', type=float_list, default=[58.393, 57.12, 57.375], help='a tuple of size 3 for the std rgb') parser.add_argument("--input_layout", type=str, default='NHWC', help="NCHW or NHWC") + parser.add_argument('--image-shape', type=int_list, default=[3, 224, 224], + help='the image shape feed into the network') ## snapshot @@ -65,7 +67,7 @@ def get_parser(parser=None): default=1, help="print loss every n iteration", ) - add_dali_args(parser) + #add_dali_args(parser) add_ofrecord_args(parser) add_optimizer_args(parser) return parser diff --git a/cnn_e2e/of_cnn_train_val.py b/cnn_e2e/of_cnn_train_val.py index 6a7d9e25aec10d064ab3da9e6c2739f45499b08f..5e5cf21594d93a75eae69a2db65bd48add4f451f 100755 --- a/cnn_e2e/of_cnn_train_val.py +++ b/cnn_e2e/of_cnn_train_val.py @@ -44,7 +44,8 @@ def TrainNet(): if args.train_data_dir: assert os.path.exists(args.train_data_dir) print("Loading data from {}".format(args.train_data_dir)) - (labels, images) = ofrecord_util.load_imagenet_for_training(args) + (labels, images) = ofrecord_util.load_imagenet_for_training2(args) + # note: images.shape = (N C H W) in cc's new dataloader(load_imagenet_for_training2) else: print("Loading synthetic data.") (labels, images) = ofrecord_util.load_synthetic(args) @@ -63,7 +64,7 @@ def InferenceNet(): if args.val_data_dir: assert os.path.exists(args.val_data_dir) print("Loading data from {}".format(args.val_data_dir)) - (labels, images) = ofrecord_util.load_imagenet_for_validation(args) + (labels, images) = ofrecord_util.load_imagenet_for_validation2(args) else: print("Loading synthetic data.") (labels, images) = ofrecord_util.load_synthetic(args) diff --git a/cnn_e2e/ofrecord_util.py b/cnn_e2e/ofrecord_util.py index 25875626f90992ef7c36c1eb54f16ec498cffb3c..8e599295d536aad39a1333d0c1ab4a4b80ef5684 100644 --- a/cnn_e2e/ofrecord_util.py +++ b/cnn_e2e/ofrecord_util.py @@ -59,9 +59,9 @@ def load_imagenet_for_validation(args): codec=flow.data.ImageCodec( [ #flow.data.ImagePreprocessor('bgr2rgb'), - flow.data.ImageTargetResizePreprocessor(resize_shorter=256), - flow.data.ImageCenterCropPreprocessor(args.image_size, args.image_size), - #flow.data.ImageResizePreprocessor(args.image_size, args.image_size), + # flow.data.ImageTargetResizePreprocessor(resize_shorter=256), + # flow.data.ImageCenterCropPreprocessor(args.image_size, args.image_size), + flow.data.ImageResizePreprocessor(args.image_size, args.image_size), ] ) return load_imagenet(args, val_batch_size, args.val_data_dir, args.val_data_part_num, codec) @@ -98,15 +98,37 @@ def load_imagenet_for_training2(args): image = flow.data.OFRecordImageDecoderRandomCrop(ofrecord, "encoded", seed=seed, color_space=color_space) label = flow.data.OFRecordRawDecoder(ofrecord, "class/label", shape=(), dtype=flow.int32) - rsz = flow.image.Resize(image, resize_x=float(args.image_size), - resize_y=float(args.image_size), + rsz = flow.image.Resize(image, resize_x=args.image_size, + resize_y=args.image_size, color_space=color_space) - rng = flow.image.CoinFlip(batch_size=train_batch_size, seed=seed) + rng = flow.random.CoinFlip(batch_size=train_batch_size, seed=seed) normal = flow.image.CropMirrorNormalize(rsz, mirror_blob=rng, color_space=color_space, mean=args.rgb_mean, std=args.rgb_std, output_dtype = flow.float) return label, normal +def load_imagenet_for_validation2(args): + total_device_num = args.num_nodes * args.gpu_num_per_node + val_batch_size = total_device_num * args.val_batch_size_per_device + + seed = 0 + color_space = 'RGB' + with flow.fixed_placement("cpu", "0:0"): + ofrecord = flow.data.ofrecord_loader(args.val_data_dir, + batch_size=val_batch_size, + data_part_num=args.val_data_part_num, + part_name_suffix_length=5) + image = flow.data.OFRecordImageDecoderRandomCrop(ofrecord, "encoded", seed=seed, + color_space=color_space) + label = flow.data.OFRecordRawDecoder(ofrecord, "class/label", shape=(), dtype=flow.int32) + rsz = flow.image.Resize(image, resize_x=args.image_size, + resize_y=args.image_size, + color_space=color_space) + + rng = flow.random.CoinFlip(batch_size=val_batch_size, seed=seed) + normal = flow.image.CropMirrorNormalize(rsz, mirror_blob=rng, color_space=color_space, + mean=args.rgb_mean, std=args.rgb_std, output_dtype = flow.float) + return label, normal if __name__ == "__main__": import os diff --git a/cnn_e2e/resnet_model.py b/cnn_e2e/resnet_model.py index 503c59d63717057b4fd76f0e4a19e09fd9ef5e4f..6f62dbc201ba470e33a85d76e291102f1f17d4c2 100755 --- a/cnn_e2e/resnet_model.py +++ b/cnn_e2e/resnet_model.py @@ -127,7 +127,9 @@ def resnet_stem(input): def resnet50(images, trainable=True): - images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) + # note: images.shape = (N C H W) in cc's new dataloader, transpose is not needed anymore + # images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) + print(images.shape, "******************************") with flow.deprecated.variable_scope("Resnet"): stem = resnet_stem(images) diff --git a/of_e2e.sh b/of_e2e.sh index cdc2eb8f816502f445952ff53d85bc2e7ca70ff4..376b6722341bfeb8c9f4f4a3ee416a315156ec15 100755 --- a/of_e2e.sh +++ b/of_e2e.sh @@ -4,14 +4,14 @@ rm -rf ./output/snapshots/* DATA_ROOT=/dataset/ImageNet/ofrecord #DATA_ROOT=/dataset/imagenet-mxnet #python3 cnn_benchmark/of_cnn_train_val.py \ -#gdb --args \ #nvprof -f -o resnet.nvvp \ +#gdb --args \ python3 cnn_e2e/of_cnn_train_val.py \ --train_data_dir=$DATA_ROOT/train \ --train_data_part_num=256 \ --val_data_dir=$DATA_ROOT/validation \ --val_data_part_num=256 \ - --num_nodes=2 \ + --num_nodes=1 \ --node_ips='11.11.1.12,11.11.1.14' \ --gpu_num_per_node=4 \ --optimizer="momentum-cosine-decay" \