# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. workspace: "paddlerec.models.treebased.tdm" # list of dataset dataset: - name: dataset_train # name of dataset to distinguish different datasets batch_size: 2 type: QueueDataset # or QueueDataset data_path: "{workspace}/data/train" data_converter: "{workspace}/tdm_reader.py" - name: dataset_infer # name batch_size: 1 type: DataLoader # or QueueDataset data_path: "{workspace}/data/test" data_converter: "{workspace}/tdm_evaluate_reader.py" # hyper parameters of user-defined network hyper_parameters: # optimizer config optimizer: class: Adam learning_rate: 0.001 strategy: async # user-defined pairs max_layers: 4 node_nums: 26 leaf_node_nums: 13 layer_node_num_list: [2, 4, 7, 12] child_nums: 2 node_emb_size: 64 input_emb_size: 768 neg_sampling_list: [1, 2, 3, 4] output_positive: True topK: 1 learning_rate: 0.0001 act: tanh tree: # 单机训练建议tree只load一次,保存为paddle tensor,之后从paddle模型热启 # 分布式训练trainer需要独立load # 预测时也改为从paddle模型加载 load_tree_from_numpy: True # only once load_paddle_model: False # train & infer need tree_layer_path: "{workspace}/tree/layer_list.txt" tree_travel_path: "{workspace}/tree/travel_list.npy" tree_info_path: "{workspace}/tree/tree_info.npy" tree_emb_path: "{workspace}/tree/tree_emb.npy" # select runner by name mode: runner1 # config of each runner. # runner is a kind of paddle training class, which wraps the train/infer process. runner: - name: runner1 class: single_train startup_class_path: "{workspace}/tdm_startup.py" # num of epochs epochs: 10 # device to run training or infer device: cpu save_checkpoint_interval: 2 # save model interval of epochs save_inference_interval: 4 # save inference save_checkpoint_path: "increment" # save checkpoint path save_inference_path: "inference" # save inference path save_inference_feed_varnames: [] # feed vars of save inference save_inference_fetch_varnames: [] # fetch vars of save inference init_model_path: "" # load model path print_interval: 10 - name: runner2 class: single_infer startup_class_path: "{workspace}/tdm_startup.py" # num of epochs epochs: 1 # device to run training or infer device: cpu init_model_path: "increment/0" # load model path print_interval: 1 - name: runner3 class: local_cluster_train startup_class_path: "{workspace}/tdm_startup.py" fleet_mode: ps epochs: 10 # device to run training or infer device: cpu save_checkpoint_interval: 2 # save model interval of epochs save_inference_interval: 4 # save inference save_checkpoint_path: "increment" # save checkpoint path save_inference_path: "inference" # save inference path save_inference_feed_varnames: [] # feed vars of save inference save_inference_fetch_varnames: [] # fetch vars of save inference init_model_path: "init_model" # load model path print_interval: 10 # runner will run all the phase in each epoch phase: - name: phase1 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_train # select dataset by name thread_num: 1 # - name: phase2 # model: "{workspace}/model.py" # dataset_name: dataset_infer # thread_num: 2