# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. workspace: "models/recall/word2vec" # list of dataset dataset: - name: dataset_train # name of dataset to distinguish different datasets batch_size: 100 type: DataLoader # or QueueDataset data_path: "{workspace}/data/train" word_count_dict_path: "{workspace}/data/dict/word_count_dict.txt" data_converter: "{workspace}/w2v_reader.py" - name: dataset_infer # name batch_size: 2000 type: DataLoader # or QueueDataset data_path: "{workspace}/data/test" word_id_dict_path: "{workspace}/data/dict/word_id_dict.txt" data_converter: "{workspace}/w2v_evaluate_reader.py" hyper_parameters: optimizer: learning_rate: 1.0 decay_steps: 100000 decay_rate: 0.999 class: sgd strategy: async sparse_feature_number: 354051 sparse_feature_dim: 300 with_shuffle_batch: False neg_num: 5 window_size: 5 # select runner by name mode: [single_cpu_train, single_cpu_infer] # config of each runner. # runner is a kind of paddle training class, which wraps the train/infer process. runner: - name: single_cpu_train class: train # num of epochs epochs: 5 # device to run training or infer device: cpu save_checkpoint_interval: 1 # save model interval of epochs save_inference_interval: 1 # save inference save_checkpoint_path: "increment_w2v" # save checkpoint path save_inference_path: "inference_w2v" # save inference path save_inference_feed_varnames: [] # feed vars of save inference save_inference_fetch_varnames: [] # fetch vars of save inference init_model_path: "" # load model path print_interval: 1000 phases: [phase1] - name: single_cpu_infer class: infer # device to run training or infer device: cpu init_model_path: "increment_w2v" # load model path print_interval: 1 phases: [phase2] # runner will run all the phase in each epoch phase: - name: phase1 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_train # select dataset by name thread_num: 5 - name: phase2 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_infer # select dataset by name thread_num: 1