diff --git a/core/trainers/framework/runner.py b/core/trainers/framework/runner.py index 449e288777b4b1519b19457dbc7a3d1ab68ce93e..839e3ed4d6e04b13f69e6c2cfc463e83aef130f7 100644 --- a/core/trainers/framework/runner.py +++ b/core/trainers/framework/runner.py @@ -212,11 +212,9 @@ class RunnerBase(object): if context["fleet_mode"].upper() == "PS": train_prog = context["model"][model_dict["name"]][ "main_program"] - print("condition 1") else: train_prog = context["model"][model_dict["name"]][ "default_main_program"] - print("condition 2") startup_prog = context["model"][model_dict["name"]][ "startup_program"] with fluid.program_guard(train_prog, startup_prog): diff --git a/models/rank/dnn/config.yaml b/models/rank/dnn/config.yaml index f0c82462485cfda69882894d16cdfadffb872c89..75826684dbc0734e4acf40983bbc837c7b97ac84 100755 --- a/models/rank/dnn/config.yaml +++ b/models/rank/dnn/config.yaml @@ -114,15 +114,13 @@ runner: print_interval: 1 phases: [phase1] -- name: local_ps_train - class: local_cluster_train +- name: single_multi_gpu_train + class: train # num of epochs epochs: 1 # device to run training or infer - device: cpu - selected_gpus: "0" # 选择多卡执行训练 - work_num: 1 - server_num: 1 + device: gpu + selected_gpus: "0,1" # 选择多卡执行训练 save_checkpoint_interval: 1 # save model interval of epochs save_inference_interval: 4 # save inference save_step_interval: 1