# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # workspace workspace: "models/rank/dnn" # list of dataset dataset: - name: dataloader_train # name of dataset to distinguish different datasets batch_size: 2 type: DataLoader # or QueueDataset data_path: "{workspace}/data/sample_data/train" sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26" dense_slots: "dense_var:13" - name: dataset_train # name of dataset to distinguish different datasets batch_size: 2 type: QueueDataset # or DataLoader data_path: "{workspace}/data/sample_data/train" sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26" dense_slots: "dense_var:13" - name: dataset_infer # name batch_size: 2 type: DataLoader # or QueueDataset data_path: "{workspace}/data/sample_data/train" sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26" dense_slots: "dense_var:13" # hyper parameters of user-defined network hyper_parameters: # optimizer config optimizer: class: Adam learning_rate: 0.001 strategy: async # user-defined pairs sparse_inputs_slots: 27 sparse_feature_number: 1000001 sparse_feature_dim: 9 dense_input_dim: 13 fc_sizes: [512, 256, 128, 32] # select runner by name mode: [single_cpu_train, single_cpu_infer] # config of each runner. # runner is a kind of paddle training class, which wraps the train/infer process. runner: - name: single_cpu_train class: train # num of epochs epochs: 4 # device to run training or infer device: cpu save_checkpoint_interval: 2 # save model interval of epochs save_inference_interval: 4 # save inference save_checkpoint_path: "increment_dnn" # save checkpoint path save_inference_path: "inference" # save inference path save_inference_feed_varnames: [] # feed vars of save inference save_inference_fetch_varnames: [] # fetch vars of save inference print_interval: 10 phases: [phase1] - name: single_cpu_infer class: infer # num of epochs epochs: 1 # device to run training or infer device: cpu init_model_path: "increment_dnn" # load model path phases: [phase2] - name: ps_cluster class: cluster_train epochs: 2 device: cpu fleet_mode: ps save_checkpoint_interval: 1 # save model interval of epochs save_checkpoint_path: "increment_dnn" # save checkpoint path init_model_path: "" # load model path print_interval: 1 phases: [phase1] - name: collective_cluster class: cluster_train epochs: 2 device: gpu fleet_mode: collective save_checkpoint_interval: 1 # save model interval of epochs save_checkpoint_path: "increment_dnn" # save checkpoint path init_model_path: "" # load model path print_interval: 1 phases: [phase1] # runner will run all the phase in each epoch phase: - name: phase1 model: "{workspace}/model.py" # user-defined model dataset_name: dataloader_train # select dataset by name thread_num: 1 - name: phase2 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_infer # select dataset by name thread_num: 1