提交 d7ae4c1f 编写于 作者: S ShawnXuan

support io test

上级 1fe2f271
......@@ -109,3 +109,38 @@ def load_imagenet_for_training2(args):
mean=args.rgb_mean, std=args.rgb_std, output_dtype = flow.float)
print(normal.shape)
return label, normal
if __name__ == "__main__":
import os
import config as configs
from util import Summary, InitNodes, Metric
from job_function_util import get_val_config
parser = configs.get_parser()
args = parser.parse_args()
configs.print_args(args)
flow.config.gpu_device_num(args.gpu_num_per_node)
flow.config.enable_debug_mode(True)
@flow.function(get_val_config(args))
def IOTest():
if args.train_data_dir:
assert os.path.exists(args.train_data_dir)
print("Loading data from {}".format(args.train_data_dir))
(labels, images) = load_imagenet_for_training(args)
#(labels, images) = load_imagenet_for_training2(args)
else:
print("Loading synthetic data.")
(labels, images) = load_synthetic(args)
predictions = labels
outputs = {"predictions":predictions, "labels": labels}
return outputs
total_device_num = args.num_nodes * args.gpu_num_per_node
train_batch_size = total_device_num * args.batch_size_per_device
summary = Summary(args.log_dir, args, filename='io_test.csv')
metric = Metric(desc='io_test', calculate_batches=args.loss_print_every_n_iter,
summary=summary, save_summary_steps=args.loss_print_every_n_iter,
batch_size=train_batch_size)
for i in range(1000):
IOTest().async_get(metric.metric_cb(0, i))
rm -rf core.*
#DATA_ROOT=/mnt/13_nfs/xuan/ImageNet/ofrecord
DATA_ROOT=/dataset/ImageNet/ofrecord
#DATA_ROOT=/dataset/imagenet-mxnet
#python3 cnn_benchmark/of_cnn_train_val.py \
#gdb --args \
#nvprof -f -o resnet.nvvp \
python3 cnn_e2e/ofrecord_util.py \
--train_data_dir=$DATA_ROOT/train \
--train_data_part_num=256 \
--val_data_dir=$DATA_ROOT/validation \
--val_data_part_num=256 \
--num_nodes=1 \
--node_ips='11.11.1.13,11.11.1.14' \
--gpu_num_per_node=4 \
--optimizer="momentum-cosine-decay" \
--learning_rate=0.256 \
--loss_print_every_n_iter=20 \
--batch_size_per_device=64 \
--val_batch_size_per_device=125 \
--model="resnet50"
#--use_fp16 true \
#--weight_l2=3.0517578125e-05 \
#--num_examples=1024 \
#--optimizer="momentum-decay" \
#--data_dir="/mnt/13_nfs/xuan/ImageNet/ofrecord/train"
#--data_dir="/mnt/dataset/xuan/ImageNet/ofrecord/train"
#--warmup_iter_num=10000 \
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册