diff --git a/of_e2e.sh b/of_e2e.sh index 376b6722341bfeb8c9f4f4a3ee416a315156ec15..187323e7caf0589949f85dc917ab8eed1148eaec 100755 --- a/of_e2e.sh +++ b/of_e2e.sh @@ -1,7 +1,8 @@ rm -rf core.* rm -rf ./output/snapshots/* -#DATA_ROOT=/mnt/13_nfs/xuan/ImageNet/ofrecord -DATA_ROOT=/dataset/ImageNet/ofrecord +#DATA_ROOT=/DATA/disk1/of_imagenet_example +DATA_ROOT=/DATA/disk1/ImageNet/ofrecord +#DATA_ROOT=/dataset/ImageNet/ofrecord #DATA_ROOT=/dataset/imagenet-mxnet #python3 cnn_benchmark/of_cnn_train_val.py \ #nvprof -f -o resnet.nvvp \ @@ -13,13 +14,19 @@ DATA_ROOT=/dataset/ImageNet/ofrecord --val_data_part_num=256 \ --num_nodes=1 \ --node_ips='11.11.1.12,11.11.1.14' \ - --gpu_num_per_node=4 \ + --gpu_num_per_node=8 \ --optimizer="momentum-cosine-decay" \ --learning_rate=0.256 \ --loss_print_every_n_iter=20 \ - --batch_size_per_device=32 \ + --batch_size_per_device=64 \ --val_batch_size_per_device=125 \ + --use_boxing_v2=True \ + --use_new_dataloader=True \ --model="resnet50" + # --train_data_dir=$DATA_ROOT/train \ + # --train_data_part_num=256 \ + # --val_data_dir=$DATA_ROOT/validation \ + # --val_data_part_num=256 \ #--use_fp16 true \ #--weight_l2=3.0517578125e-05 \ #--num_examples=1024 \