diff --git a/fluid/image_classification/.run_ce.sh b/fluid/image_classification/.run_ce.sh index be1a37615885f0e7f6700b0399e419ac90afaa33..eee6cc08f5eadbfbe48827712de33497ea89d429 100755 --- a/fluid/image_classification/.run_ce.sh +++ b/fluid/image_classification/.run_ce.sh @@ -1,10 +1,11 @@ #!/bin/bash # This file is only used for continuous evaluation. +export FLAGS_cudnn_deterministic=True cudaid=${object_detection_cudaid:=0} export CUDA_VISIBLE_DEVICES=$cudaid -python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py +python train.py --batch_size=64 --num_epochs=5 --enable_ce=True | python _ce.py cudaid=${object_detection_cudaid_m:=0, 1, 2, 3} export CUDA_VISIBLE_DEVICES=$cudaid -python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py +python train.py --batch_size=128 --num_epochs=5 --enable_ce=True | python _ce.py diff --git a/fluid/image_classification/_ce.py b/fluid/image_classification/_ce.py index 8b826b93628aa23631172ac7449f740fe3894484..363bcae789c29fd212dcffe22e72cf7aac7b665a 100644 --- a/fluid/image_classification/_ce.py +++ b/fluid/image_classification/_ce.py @@ -11,11 +11,11 @@ from kpi import CostKpi, DurationKpi, AccKpi train_acc_top1_kpi = AccKpi('train_acc_top1', 0.05, 0, desc='TOP1 ACC') train_acc_top5_kpi = AccKpi( - 'train_acc_top5', 0.05, 0, actived=True, desc='TOP5 ACC') + 'train_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC') train_cost_kpi = CostKpi('train_cost', 0.5, 0, actived=True, desc='train cost') test_acc_top1_kpi = AccKpi('test_acc_top1', 0.05, 0, desc='TOP1 ACC') test_acc_top5_kpi = AccKpi( - 'test_acc_top5', 0.05, 0, actived=True, desc='TOP5 ACC') + 'test_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC') test_cost_kpi = CostKpi('test_cost', 0.05, 0, actived=True, desc='train cost') train_speed_kpi = AccKpi( 'train_speed', @@ -27,13 +27,13 @@ train_speed_kpi = AccKpi( train_acc_top1_card4_kpi = AccKpi( 'train_acc_top1_card4', 0.05, 0, desc='TOP1 ACC') train_acc_top5_card4_kpi = AccKpi( - 'train_acc_top5_card4', 0.05, 0, actived=True, desc='TOP5 ACC') + 'train_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') train_cost_card4_kpi = CostKpi( 'train_cost_kpi', 0.05, 0, actived=True, desc='train cost') test_acc_top1_card4_kpi = AccKpi( 'test_acc_top1_card4', 0.05, 0, desc='TOP1 ACC') test_acc_top5_card4_kpi = AccKpi( - 'test_acc_top5_card4', 0.05, 0, actived=True, desc='TOP5 ACC') + 'test_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') test_cost_card4_kpi = CostKpi( 'test_cost_card4', 0.05, 0, actived=True, desc='train cost') train_speed_card4_kpi = AccKpi( diff --git a/fluid/image_classification/models/se_resnext.py b/fluid/image_classification/models/se_resnext.py index 3821381f290f1ea9457f6081a3c1176b3676d3e7..cc03b29a494f124faa3539d4d7ec8eb79434ed64 100644 --- a/fluid/image_classification/models/se_resnext.py +++ b/fluid/image_classification/models/se_resnext.py @@ -14,7 +14,7 @@ train_parameters = { "input_size": [3, 224, 224], "input_mean": [0.485, 0.456, 0.406], "input_std": [0.229, 0.224, 0.225], - "dropout_seed": None, + "enable_ce": False, "learning_strategy": { "name": "piecewise_decay", "batch_size": 256, @@ -105,9 +105,11 @@ class SE_ResNeXt(): pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) - # do not set seed when traning, it is only used for debug - drop = fluid.layers.dropout( - x=pool, dropout_prob=0.5, seed=self.params["dropout_seed"]) + # enable_ce is used for continuous evaluation to remove the randomness + if self.params["enable_ce"]: + drop = pool + else: + drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc(input=drop, size=class_dim, diff --git a/fluid/image_classification/train.py b/fluid/image_classification/train.py index c72f626a0e8be898fdaa5320d72209366e68673e..041064c5ce5d6e640b83bd91055137139e3baf14 100644 --- a/fluid/image_classification/train.py +++ b/fluid/image_classification/train.py @@ -108,7 +108,7 @@ def train(args): if args.enable_ce: assert model_name == "SE_ResNeXt50_32x4d" fluid.default_startup_program().random_seed = 1000 - model.params["dropout_seed"] = 100 + model.params["enable_ce"] = True class_dim = 102 if model_name == "GoogleNet": @@ -258,7 +258,7 @@ def train(args): # This is for continuous evaluation only if args.enable_ce and pass_id == args.num_epochs - 1: if gpu_nums == 1: - # Use the last cost/acc for training + # Use the mean cost/acc for training print("kpis train_cost %s" % train_loss) print("kpis train_acc_top1 %s" % train_acc1) print("kpis train_acc_top5 %s" % train_acc5) @@ -268,21 +268,21 @@ def train(args): print("kpis test_acc_top5 %s" % test_acc5) print("kpis train_speed %s" % train_speed) else: - # Use the last cost/acc for training - print("kpis train_cost_card%s %s" % + # Use the mean cost/acc for training + print("kpis train_cost_card%s %s" % (gpu_nums, train_loss)) - print("kpis train_acc_top1_card%s %s" % + print("kpis train_acc_top1_card%s %s" % (gpu_nums, train_acc1)) - print("kpis train_acc_top5_card%s %s" % + print("kpis train_acc_top5_card%s %s" % (gpu_nums, train_acc5)) # Use the mean cost/acc for testing - print("kpis test_cost_card%s %s" % + print("kpis test_cost_card%s %s" % (gpu_nums, test_loss)) - print("kpis test_acc_top1_card%s %s" % + print("kpis test_acc_top1_card%s %s" % (gpu_nums, test_acc1)) - print("kpis test_acc_top5_card%s %s" % + print("kpis test_acc_top5_card%s %s" % (gpu_nums, test_acc5)) - print("kpis train_speed_card%s %s" % + print("kpis train_speed_card%s %s" % (gpu_nums, train_speed))