diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index e61333fd1ca8aba417c1b8eed8542ff646c89e2a..c4d5f26aa673ec656f98951b65e3871aa3e1af9e 100755 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -102,13 +102,16 @@ def validate(args, test_batch_time_record = [] test_batch_metrics_record = [] test_batch_id = 0 - compiled_program = best_strategy_compiled( - args, - test_prog, - test_fetch_list[0], - exe, - mode="val", - share_prog=train_prog) + if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) > 1: + compiled_program = test_prog + else: + compiled_program = best_strategy_compiled( + args, + test_prog, + test_fetch_list[0], + exe, + mode="val", + share_prog=train_prog) for batch in test_iter: t1 = time.time() test_batch_metrics = exe.run(program=compiled_program, diff --git a/PaddleCV/image_classification/utils/dist_utils.py b/PaddleCV/image_classification/utils/dist_utils.py index c98a64dd08ce2ae23725c44bd4bb1de4c0c8eda8..681c260e6e07493b0e8035dfcf7b046d8e2f3ba0 100755 --- a/PaddleCV/image_classification/utils/dist_utils.py +++ b/PaddleCV/image_classification/utils/dist_utils.py @@ -85,8 +85,8 @@ def prepare_for_multi_process(exe, build_strategy, train_prog): trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) if num_trainers < 2: return - logger.info("PADDLE_TRAINERS_NUM", num_trainers) - logger.info("PADDLE_TRAINER_ID", trainer_id) + logger.info("PADDLE_TRAINERS_NUM %s" % num_trainers) + logger.info("PADDLE_TRAINER_ID %s" % trainer_id) build_strategy.num_trainers = num_trainers build_strategy.trainer_id = trainer_id # NOTE(zcd): use multi processes to train the model,