config.yaml 4.6 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
fix  
xjqbest 已提交
15
# workspace
C
Chengmo 已提交
16
workspace: "models/rank/dnn"
C
chengmo 已提交
17

X
fix  
xjqbest 已提交
18
# list of dataset
X
test  
xjqbest 已提交
19
dataset:
C
Chengmo 已提交
20
- name: dataloader_train # name of dataset to distinguish different datasets
X
test  
xjqbest 已提交
21
  batch_size: 2
X
fix  
xjqbest 已提交
22 23
  type: DataLoader # or QueueDataset 
  data_path: "{workspace}/data/sample_data/train"
X
test  
xjqbest 已提交
24 25
  sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
  dense_slots: "dense_var:13"
C
Chengmo 已提交
26 27 28 29 30 31
- name: dataset_train # name of dataset to distinguish different datasets
  batch_size: 2
  type: QueueDataset # or DataLoader 
  data_path: "{workspace}/data/sample_data/train"
  sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
  dense_slots: "dense_var:13"
X
fix  
xjqbest 已提交
32
- name: dataset_infer # name
X
fix  
xjqbest 已提交
33
  batch_size: 2
X
fix  
xjqbest 已提交
34
  type: DataLoader # or QueueDataset
X
fix  
xjqbest 已提交
35
  data_path: "{workspace}/data/sample_data/train"
X
fix  
xjqbest 已提交
36 37
  sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
  dense_slots: "dense_var:13"
T
tangwei 已提交
38

X
fix  
xjqbest 已提交
39
# hyper parameters of user-defined network
X
test  
xjqbest 已提交
40
hyper_parameters:
X
fix  
xjqbest 已提交
41
  # optimizer config
X
test  
xjqbest 已提交
42 43 44 45
  optimizer:
    class: Adam
    learning_rate: 0.001
    strategy: async
X
fix  
xjqbest 已提交
46
  # user-defined <key, value> pairs
X
test  
xjqbest 已提交
47 48 49 50 51
  sparse_inputs_slots: 27
  sparse_feature_number: 1000001
  sparse_feature_dim: 9
  dense_input_dim: 13
  fc_sizes: [512, 256, 128, 32]
S
seiriosPlus 已提交
52
  distributed_embedding: 0
T
tangwei 已提交
53

X
fix  
xjqbest 已提交
54
# select runner by name
T
tangwei 已提交
55
mode: [single_cpu_train, single_cpu_infer]
X
fix  
xjqbest 已提交
56 57
# config of each runner.
# runner is a kind of paddle training class, which wraps the train/infer process.
X
fix  
xjqbest 已提交
58
runner:
C
Chengmo 已提交
59 60
- name: single_cpu_train
  class: train
X
fix  
xjqbest 已提交
61
  # num of epochs
C
Chengmo 已提交
62
  epochs: 4
X
fix  
xjqbest 已提交
63 64
  # device to run training or infer
  device: cpu
X
fix  
xjqbest 已提交
65 66
  save_checkpoint_interval: 2 # save model interval of epochs
  save_inference_interval: 4 # save inference
67
  save_checkpoint_path: "increment_dnn" # save checkpoint path
X
fix  
xjqbest 已提交
68
  save_inference_path: "inference" # save inference path
X
fix  
xjqbest 已提交
69 70
  save_inference_feed_varnames: [] # feed vars of save inference
  save_inference_fetch_varnames: [] # fetch vars of save inference
X
fix  
xjqbest 已提交
71
  print_interval: 10
T
tangwei 已提交
72 73
  phases: [phase1]

C
Chengmo 已提交
74 75
- name: single_cpu_infer
  class: infer
X
fix  
xjqbest 已提交
76
  # num of epochs
C
Chengmo 已提交
77
  epochs: 1
X
fix  
xjqbest 已提交
78 79
  # device to run training or infer
  device: cpu
80
  init_model_path: "increment_dnn" # load model path
T
tangwei 已提交
81 82
  phases: [phase2]

C
Chengmo 已提交
83 84 85 86 87 88 89 90 91 92 93
- name: ps_cluster
  class: cluster_train
  epochs: 2
  device: cpu
  fleet_mode: ps
  save_checkpoint_interval: 1 # save model interval of epochs
  save_checkpoint_path: "increment_dnn" # save checkpoint path
  init_model_path: "" # load model path
  print_interval: 1
  phases: [phase1]

S
seiriosPlus 已提交
94 95 96 97 98 99 100 101 102 103 104 105
- name: online_learning_cluster
  class: cluster_train
  runner_class_path: "{workspace}/online_learning_runner.py"
  epochs: 2
  device: cpu
  fleet_mode: ps
  save_checkpoint_interval: 1 # save model interval of epochs
  save_checkpoint_path: "increment_dnn" # save checkpoint path
  init_model_path: "" # load model path
  print_interval: 1
  phases: [phase1]

C
Chengmo 已提交
106 107 108 109 110 111 112 113 114 115 116
- name: collective_cluster
  class: cluster_train
  epochs: 2
  device: gpu
  fleet_mode: collective
  save_checkpoint_interval: 1 # save model interval of epochs
  save_checkpoint_path: "increment_dnn" # save checkpoint path
  init_model_path: "" # load model path
  print_interval: 1
  phases: [phase1]

L
fix bug  
liuyuhui 已提交
117 118
- name: single_multi_gpu_train
  class: train
L
liuyuhui 已提交
119 120 121
  # num of epochs
  epochs: 1
  # device to run training or infer
L
fix bug  
liuyuhui 已提交
122 123
  device: gpu
  selected_gpus: "0,1" # 选择多卡执行训练
L
liuyuhui 已提交
124 125 126 127 128 129 130 131 132 133
  save_checkpoint_interval: 1 # save model interval of epochs
  save_inference_interval: 4 # save inference
  save_step_interval: 1
  save_checkpoint_path: "increment_dnn" # save checkpoint path
  save_inference_path: "inference" # save inference path
  save_step_path: "step_save"
  save_inference_feed_varnames: [] # feed vars of save inference
  save_inference_fetch_varnames: [] # fetch vars of save inference
  print_interval: 1
  phases: [phase1]
X
fix  
xjqbest 已提交
134
# runner will run all the phase in each epoch
X
fix  
xjqbest 已提交
135
phase:
X
fix  
xjqbest 已提交
136 137
- name: phase1
  model: "{workspace}/model.py" # user-defined model
C
Chengmo 已提交
138
  dataset_name: dataloader_train # select dataset by name
X
fix  
xjqbest 已提交
139
  thread_num: 1
T
tangwei 已提交
140 141 142 143 144

- name: phase2
  model: "{workspace}/model.py" # user-defined model
  dataset_name: dataset_infer # select dataset by name
  thread_num: 1