diff --git a/core/model.py b/core/model.py index cfe71f2a5db76b2585b6df23915724909ef68033..5aae5c5256e357f28197eee070a9dc9c130bb173 100755 --- a/core/model.py +++ b/core/model.py @@ -59,11 +59,17 @@ class Model(object): dataset = i break name = "dataset." + dataset["name"] + "." - sparse_slots = envs.get_global_env(name + "sparse_slots") - dense_slots = envs.get_global_env(name + "dense_slots") - if sparse_slots is not None or dense_slots is not None: - sparse_slots = sparse_slots.strip().split(" ") - dense_slots = dense_slots.strip().split(" ") + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() + if sparse_slots != "" or dense_slots != "": + if sparse_slots == "": + sparse_slots = [] + else: + sparse_slots = sparse_slots.strip().split(" ") + if dense_slots == "": + dense_slots = [] + else: + dense_slots = dense_slots.strip().split(" ") dense_slots_shape = [[ int(j) for j in i.split(":")[1].strip("[]").split(",") ] for i in dense_slots] @@ -151,11 +157,17 @@ class Model(object): def input_data(self, is_infer=False, **kwargs): name = "dataset." + kwargs.get("dataset_name") + "." - sparse_slots = envs.get_global_env(name + "sparse_slots") - dense_slots = envs.get_global_env(name + "dense_slots") - if sparse_slots is not None or dense_slots is not None: - sparse_slots = sparse_slots.strip().split(" ") - dense_slots = dense_slots.strip().split(" ") + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() + if sparse_slots != "" or dense_slots != "": + if sparse_slots == "": + sparse_slots = [] + else: + sparse_slots = sparse_slots.strip().split(" ") + if dense_slots == "": + dense_slots = [] + else: + dense_slots = dense_slots.strip().split(" ") dense_slots_shape = [[ int(j) for j in i.split(":")[1].strip("[]").split(",") ] for i in dense_slots] diff --git a/core/trainers/single_infer.py b/core/trainers/single_infer.py new file mode 100755 index 0000000000000000000000000000000000000000..7da93bd82660ea202a59eb8c35fc37391dae5c48 --- /dev/null +++ b/core/trainers/single_infer.py @@ -0,0 +1,355 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Training use fluid with one node only. +""" + +from __future__ import print_function + +import time +import logging +import os +import paddle.fluid as fluid + +from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer +from paddlerec.core.utils import envs +from paddlerec.core.reader import SlotReader +from paddlerec.core.utils import dataloader_instance + +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + + +class SingleInfer(TranspileTrainer): + def __init__(self, config=None): + super(TranspileTrainer, self).__init__(config) + self._env = self._config + device = envs.get_global_env("device") + if device == 'gpu': + self._place = fluid.CUDAPlace(0) + elif device == 'cpu': + self._place = fluid.CPUPlace() + self._exe = fluid.Executor(self._place) + self.processor_register() + self._model = {} + self._dataset = {} + envs.set_global_envs(self._config) + envs.update_workspace() + self._runner_name = envs.get_global_env("mode") + device = envs.get_global_env("runner." + self._runner_name + ".device") + if device == 'gpu': + self._place = fluid.CUDAPlace(0) + elif device == 'cpu': + self._place = fluid.CPUPlace() + self._exe = fluid.Executor(self._place) + + def processor_register(self): + self.regist_context_processor('uninit', self.instance) + self.regist_context_processor('init_pass', self.init) + self.regist_context_processor('startup_pass', self.startup) + self.regist_context_processor('train_pass', self.executor_train) + self.regist_context_processor('terminal_pass', self.terminal) + + def instance(self, context): + context['status'] = 'init_pass' + + def _get_dataset(self, dataset_name): + name = "dataset." + dataset_name + "." + thread_num = envs.get_global_env(name + "thread_num") + batch_size = envs.get_global_env(name + "batch_size") + reader_class = envs.get_global_env(name + "data_converter") + abs_dir = os.path.dirname(os.path.abspath(__file__)) + reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py') + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() + if sparse_slots == "" and dense_slots == "": + pipe_cmd = "python {} {} {} {}".format(reader, reader_class, + "TRAIN", self._config_yaml) + else: + if sparse_slots is None: + sparse_slots = "#" + if dense_slots is None: + dense_slots = "#" + padding = envs.get_global_env(name + "padding", 0) + pipe_cmd = "python {} {} {} {} {} {} {} {}".format( + reader, "slot", "slot", self._config_yaml, "fake", \ + sparse_slots.replace(" ", "#"), dense_slots.replace(" ", "#"), str(padding)) + + dataset = fluid.DatasetFactory().create_dataset() + dataset.set_batch_size(envs.get_global_env(name + "batch_size")) + dataset.set_pipe_command(pipe_cmd) + train_data_path = envs.get_global_env(name + "data_path") + file_list = [ + os.path.join(train_data_path, x) + for x in os.listdir(train_data_path) + ] + dataset.set_filelist(file_list) + for model_dict in self._env["phase"]: + if model_dict["dataset_name"] == dataset_name: + model = self._model[model_dict["name"]][3] + inputs = model._infer_data_var + dataset.set_use_var(inputs) + break + return dataset + + def _get_dataloader(self, dataset_name, dataloader): + name = "dataset." + dataset_name + "." + thread_num = envs.get_global_env(name + "thread_num") + batch_size = envs.get_global_env(name + "batch_size") + reader_class = envs.get_global_env(name + "data_converter") + abs_dir = os.path.dirname(os.path.abspath(__file__)) + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() + if sparse_slots == "" and dense_slots == "": + reader = dataloader_instance.dataloader_by_name( + reader_class, dataset_name, self._config_yaml) + reader_class = envs.lazy_instance_by_fliename(reader_class, + "TrainReader") + reader_ins = reader_class(self._config_yaml) + else: + reader = dataloader_instance.slotdataloader_by_name( + "", dataset_name, self._config_yaml) + reader_ins = SlotReader(self._config_yaml) + if hasattr(reader_ins, 'generate_batch_from_trainfiles'): + dataloader.set_sample_list_generator(reader) + else: + dataloader.set_sample_generator(reader, batch_size) + return dataloader + + def _create_dataset(self, dataset_name): + name = "dataset." + dataset_name + "." + sparse_slots = envs.get_global_env(name + "sparse_slots") + dense_slots = envs.get_global_env(name + "dense_slots") + thread_num = envs.get_global_env(name + "thread_num") + batch_size = envs.get_global_env(name + "batch_size") + type_name = envs.get_global_env(name + "type") + if envs.get_platform() != "LINUX": + print("platform ", envs.get_platform(), + " change reader to DataLoader") + type_name = "DataLoader" + padding = 0 + + if type_name == "DataLoader": + return None + else: + return self._get_dataset(dataset_name) + + def init(self, context): + for model_dict in self._env["phase"]: + self._model[model_dict["name"]] = [None] * 5 + train_program = fluid.Program() + startup_program = fluid.Program() + scope = fluid.Scope() + dataset_name = model_dict["dataset_name"] + opt_name = envs.get_global_env("hyper_parameters.optimizer.class") + opt_lr = envs.get_global_env( + "hyper_parameters.optimizer.learning_rate") + opt_strategy = envs.get_global_env( + "hyper_parameters.optimizer.strategy") + with fluid.program_guard(train_program, startup_program): + with fluid.unique_name.guard(): + with fluid.scope_guard(scope): + model_path = model_dict["model"].replace( + "{workspace}", + envs.path_adapter(self._env["workspace"])) + model = envs.lazy_instance_by_fliename( + model_path, "Model")(self._env) + model._infer_data_var = model.input_data( + dataset_name=model_dict["dataset_name"]) + if envs.get_global_env("dataset." + dataset_name + + ".type") == "DataLoader": + model._init_dataloader(is_infer=True) + self._get_dataloader(dataset_name, + model._data_loader) + model.net(model._infer_data_var, True) + self._model[model_dict["name"]][0] = train_program + self._model[model_dict["name"]][1] = startup_program + self._model[model_dict["name"]][2] = scope + self._model[model_dict["name"]][3] = model + self._model[model_dict["name"]][4] = train_program.clone() + + for dataset in self._env["dataset"]: + if dataset["type"] != "DataLoader": + self._dataset[dataset["name"]] = self._create_dataset(dataset[ + "name"]) + + context['status'] = 'startup_pass' + + def startup(self, context): + for model_dict in self._env["phase"]: + with fluid.scope_guard(self._model[model_dict["name"]][2]): + self._exe.run(self._model[model_dict["name"]][1]) + context['status'] = 'train_pass' + + def executor_train(self, context): + epochs = int( + envs.get_global_env("runner." + self._runner_name + ".epochs")) + for j in range(epochs): + for model_dict in self._env["phase"]: + if j == 0: + with fluid.scope_guard(self._model[model_dict["name"]][2]): + train_prog = self._model[model_dict["name"]][0] + startup_prog = self._model[model_dict["name"]][1] + with fluid.program_guard(train_prog, startup_prog): + self.load() + reader_name = model_dict["dataset_name"] + name = "dataset." + reader_name + "." + begin_time = time.time() + if envs.get_global_env(name + "type") == "DataLoader": + self._executor_dataloader_train(model_dict) + else: + self._executor_dataset_train(model_dict) + with fluid.scope_guard(self._model[model_dict["name"]][2]): + train_prog = self._model[model_dict["name"]][4] + startup_prog = self._model[model_dict["name"]][1] + with fluid.program_guard(train_prog, startup_prog): + self.save(j) + end_time = time.time() + seconds = end_time - begin_time + print("epoch {} done, time elasped: {}".format(j, seconds)) + context['status'] = "terminal_pass" + + def _executor_dataset_train(self, model_dict): + reader_name = model_dict["dataset_name"] + model_name = model_dict["name"] + model_class = self._model[model_name][3] + fetch_vars = [] + fetch_alias = [] + fetch_period = int( + envs.get_global_env("runner." + self._runner_name + + ".fetch_period", 20)) + metrics = model_class.get_infer_results() + if metrics: + fetch_vars = metrics.values() + fetch_alias = metrics.keys() + scope = self._model[model_name][2] + program = self._model[model_name][0] + reader = self._dataset[reader_name] + with fluid.scope_guard(scope): + self._exe.infer_from_dataset( + program=program, + dataset=reader, + fetch_list=fetch_vars, + fetch_info=fetch_alias, + print_period=fetch_period) + + def _executor_dataloader_train(self, model_dict): + reader_name = model_dict["dataset_name"] + model_name = model_dict["name"] + model_class = self._model[model_name][3] + program = self._model[model_name][0].clone() + fetch_vars = [] + fetch_alias = [] + metrics = model_class.get_infer_results() + if metrics: + fetch_vars = metrics.values() + fetch_alias = metrics.keys() + metrics_varnames = [] + metrics_format = [] + fetch_period = int( + envs.get_global_env("runner." + self._runner_name + + ".fetch_period", 20)) + metrics_format.append("{}: {{}}".format("batch")) + for name, var in metrics.items(): + metrics_varnames.append(var.name) + metrics_format.append("{}: {{}}".format(name)) + metrics_format = ", ".join(metrics_format) + + reader = self._model[model_name][3]._data_loader + reader.start() + batch_id = 0 + scope = self._model[model_name][2] + with fluid.scope_guard(scope): + try: + while True: + metrics_rets = self._exe.run(program=program, + fetch_list=metrics_varnames) + metrics = [batch_id] + metrics.extend(metrics_rets) + + if batch_id % fetch_period == 0 and batch_id != 0: + print(metrics_format.format(*metrics)) + batch_id += 1 + except fluid.core.EOFException: + reader.reset() + + def terminal(self, context): + context['is_exit'] = True + + def load(self, is_fleet=False): + name = "runner." + self._runner_name + "." + dirname = envs.get_global_env("epoch.init_model_path", None) + if dirname is None or dirname == "": + return + print("single_infer going to load ", dirname) + if is_fleet: + fleet.load_persistables(self._exe, dirname) + else: + fluid.io.load_persistables(self._exe, dirname) + + def save(self, epoch_id, is_fleet=False): + def need_save(epoch_id, epoch_interval, is_last=False): + if is_last: + return True + if epoch_id == -1: + return False + + return epoch_id % epoch_interval == 0 + + def save_inference_model(): + name = "runner." + self._runner_name + "." + save_interval = int( + envs.get_global_env(name + "save_inference_interval", -1)) + if not need_save(epoch_id, save_interval, False): + return + feed_varnames = envs.get_global_env( + name + "save_inference_feed_varnames", None) + fetch_varnames = envs.get_global_env( + name + "save_inference_fetch_varnames", None) + if feed_varnames is None or fetch_varnames is None or feed_varnames == "": + return + fetch_vars = [ + fluid.default_main_program().global_block().vars[varname] + for varname in fetch_varnames + ] + dirname = envs.get_global_env(name + "save_inference_path", None) + + assert dirname is not None + dirname = os.path.join(dirname, str(epoch_id)) + + if is_fleet: + fleet.save_inference_model(self._exe, dirname, feed_varnames, + fetch_vars) + else: + fluid.io.save_inference_model(dirname, feed_varnames, + fetch_vars, self._exe) + + def save_persistables(): + name = "runner." + self._runner_name + "." + save_interval = int( + envs.get_global_env(name + "save_checkpoint_interval", -1)) + if not need_save(epoch_id, save_interval, False): + return + dirname = envs.get_global_env(name + "save_checkpoint_path", None) + if dirname is None or dirname == "": + return + dirname = os.path.join(dirname, str(epoch_id)) + if is_fleet: + fleet.save_persistables(self._exe, dirname) + else: + fluid.io.save_persistables(self._exe, dirname) + + save_persistables() + save_inference_model() diff --git a/core/trainers/single_trainer.py b/core/trainers/single_trainer.py index 73d82b1ae0a0231ab3657e285f805c84e49aab91..264625526fe4dbe9630612246256ee87d7428dd8 100755 --- a/core/trainers/single_trainer.py +++ b/core/trainers/single_trainer.py @@ -36,18 +36,18 @@ class SingleTrainer(TranspileTrainer): def __init__(self, config=None): super(TranspileTrainer, self).__init__(config) self._env = self._config - device = envs.get_global_env("device") - if device == 'gpu': - self._place = fluid.CUDAPlace(0) - elif device == 'cpu': - self._place = fluid.CPUPlace() - self._exe = fluid.Executor(self._place) self.processor_register() self._model = {} self._dataset = {} envs.set_global_envs(self._config) envs.update_workspace() self._runner_name = envs.get_global_env("mode") + device = envs.get_global_env("runner." + self._runner_name + ".device") + if device == 'gpu': + self._place = fluid.CUDAPlace(0) + elif device == 'cpu': + self._place = fluid.CPUPlace() + self._exe = fluid.Executor(self._place) def processor_register(self): self.regist_context_processor('uninit', self.instance) @@ -61,21 +61,20 @@ class SingleTrainer(TranspileTrainer): def _get_dataset(self, dataset_name): name = "dataset." + dataset_name + "." - sparse_slots = envs.get_global_env(name + "sparse_slots") - dense_slots = envs.get_global_env(name + "dense_slots") thread_num = envs.get_global_env(name + "thread_num") batch_size = envs.get_global_env(name + "batch_size") reader_class = envs.get_global_env(name + "data_converter") abs_dir = os.path.dirname(os.path.abspath(__file__)) reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py') - - if sparse_slots is None and dense_slots is None: + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() + if sparse_slots != "" and dense_slots != "": pipe_cmd = "python {} {} {} {}".format(reader, reader_class, "TRAIN", self._config_yaml) else: - if sparse_slots is None: + if sparse_slots == "": sparse_slots = "#" - if dense_slots is None: + if dense_slots == "": dense_slots = "#" padding = envs.get_global_env(name + "padding", 0) pipe_cmd = "python {} {} {} {} {} {} {} {}".format( @@ -101,13 +100,13 @@ class SingleTrainer(TranspileTrainer): def _get_dataloader(self, dataset_name, dataloader): name = "dataset." + dataset_name + "." - sparse_slots = envs.get_global_env(name + "sparse_slots") - dense_slots = envs.get_global_env(name + "dense_slots") + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() thread_num = envs.get_global_env(name + "thread_num") batch_size = envs.get_global_env(name + "batch_size") reader_class = envs.get_global_env(name + "data_converter") abs_dir = os.path.dirname(os.path.abspath(__file__)) - if sparse_slots is None and dense_slots is None: + if sparse_slots == "" and dense_slots == "": reader = dataloader_instance.dataloader_by_name( reader_class, dataset_name, self._config_yaml) reader_class = envs.lazy_instance_by_fliename(reader_class, @@ -125,8 +124,8 @@ class SingleTrainer(TranspileTrainer): def _create_dataset(self, dataset_name): name = "dataset." + dataset_name + "." - sparse_slots = envs.get_global_env(name + "sparse_slots") - dense_slots = envs.get_global_env(name + "dense_slots") + sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() + dense_slots = envs.get_global_env(name + "dense_slots", "").strip() thread_num = envs.get_global_env(name + "thread_num") batch_size = envs.get_global_env(name + "batch_size") type_name = envs.get_global_env(name + "type") @@ -192,7 +191,8 @@ class SingleTrainer(TranspileTrainer): context['status'] = 'train_pass' def executor_train(self, context): - epochs = int(self._env["epochs"]) + epochs = int( + envs.get_global_env("runner." + self._runner_name + ".epochs")) for j in range(epochs): for model_dict in self._env["phase"]: if j == 0: @@ -224,7 +224,9 @@ class SingleTrainer(TranspileTrainer): model_class = self._model[model_name][3] fetch_vars = [] fetch_alias = [] - fetch_period = 20 + fetch_period = int( + envs.get_global_env("runner." + self._runner_name + + ".fetch_period", 20)) metrics = model_class.get_metrics() if metrics: fetch_vars = metrics.values() @@ -249,14 +251,15 @@ class SingleTrainer(TranspileTrainer): loss_name=model_class.get_avg_cost().name) fetch_vars = [] fetch_alias = [] - fetch_period = 20 + fetch_period = int( + envs.get_global_env("runner." + self._runner_name + + ".fetch_period", 20)) metrics = model_class.get_metrics() if metrics: fetch_vars = metrics.values() fetch_alias = metrics.keys() metrics_varnames = [] metrics_format = [] - fetch_period = 20 metrics_format.append("{}: {{}}".format("batch")) for name, var in metrics.items(): metrics_varnames.append(var.name) @@ -287,7 +290,7 @@ class SingleTrainer(TranspileTrainer): def load(self, is_fleet=False): dirname = envs.get_global_env( "runner." + self._runner_name + ".init_model_path", None) - if dirname is None: + if dirname is None or dirname == "": return print("going to load ", dirname) if is_fleet: @@ -311,10 +314,11 @@ class SingleTrainer(TranspileTrainer): if not need_save(epoch_id, save_interval, False): return feed_varnames = envs.get_global_env( - name + "save_inference_feed_varnames", None) + name + "save_inference_feed_varnames", []) fetch_varnames = envs.get_global_env( - name + "save_inference_fetch_varnames", None) - if feed_varnames is None or fetch_varnames is None or feed_varnames == "": + name + "save_inference_fetch_varnames", []) + if feed_varnames is None or fetch_varnames is None or feed_varnames == "" or fetch_varnames == "" or \ + len(feed_varnames) == 0 or len(fetch_varnames) == 0: return fetch_vars = [ fluid.default_main_program().global_block().vars[varname] diff --git a/core/utils/dataloader_instance.py b/core/utils/dataloader_instance.py index 23cdfdc5761eb0ba8e3bce65c3a406e89fb46267..a26e2df20876e48bec5a1f30492987605613ef8a 100755 --- a/core/utils/dataloader_instance.py +++ b/core/utils/dataloader_instance.py @@ -68,8 +68,12 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file): data_path = os.path.join(package_base, data_path.split("::")[1]) files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)] - sparse = get_global_env(name + "sparse_slots") - dense = get_global_env(name + "dense_slots") + sparse = get_global_env(name + "sparse_slots", "#") + if sparse == "": + sparse = "#" + dense = get_global_env(name + "dense_slots", "#") + if dense == "": + dense = "#" padding = get_global_env(name + "padding", 0) reader = SlotReader(yaml_file) reader.init(sparse, dense, int(padding)) @@ -158,8 +162,12 @@ def slotdataloader(readerclass, train, yaml_file): files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)] - sparse = get_global_env("sparse_slots", None, namespace) - dense = get_global_env("dense_slots", None, namespace) + sparse = get_global_env("sparse_slots", "#", namespace) + if sparse == "": + sparse = "#" + dense = get_global_env("dense_slots", "#", namespace) + if dense == "": + dense = "#" padding = get_global_env("padding", 0, namespace) reader = SlotReader(yaml_file) reader.init(sparse, dense, int(padding)) diff --git a/models/rank/dnn/config.yaml b/models/rank/dnn/config.yaml index ae06cf3f19fa473759fbf9f0f9fa7c523b0fbd7e..57bb81d56721ff875c8a9a747ed56ac100582ec2 100755 --- a/models/rank/dnn/config.yaml +++ b/models/rank/dnn/config.yaml @@ -12,65 +12,72 @@ # See the License for the specific language governing permissions and # limitations under the License. -# 轮数 -epochs: 10 -# 设备 -device: cpu -# 工作目录 +# workspace workspace: "paddlerec.models.rank.dnn" -# dataset列表 +# list of dataset dataset: -- name: dataset_train # 名字,用来区分不同的dataset +- name: dataset_train # name of dataset to distinguish different datasets batch_size: 2 - type: DataLoader # 或者QueueDataset - data_path: "{workspace}/data/sample_data/train" # 数据路径 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/sample_data/train" sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26" dense_slots: "dense_var:13" -- name: dataset_infer # 名字,用来区分不同的dataset +- name: dataset_infer # name batch_size: 2 - type: DataLoader # 或者QueueDataset - data_path: "{workspace}/data/sample_data/test" # 数据路径 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/sample_data/train" sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26" dense_slots: "dense_var:13" -# 超参数 +# hyper parameters of user-defined network hyper_parameters: - #优化器 + # optimizer config optimizer: class: Adam learning_rate: 0.001 strategy: async - # 用户自定义 + # user-defined pairs sparse_inputs_slots: 27 sparse_feature_number: 1000001 sparse_feature_dim: 9 dense_input_dim: 13 fc_sizes: [512, 256, 128, 32] +# select runner by name mode: runner1 -# runner配置 +# config of each runner. +# runner is a kind of paddle training class, which wraps the train/infer process. runner: - - name: runner1 - class: single_train - save_checkpoint_interval: 2 # 保存模型 - save_inference_interval: 4 # 保存预测模型 - save_checkpoint_path: "increment" # 保存模型路径 - save_inference_path: "inference" # 保存预测模型路径 - #save_inference_feed_varnames: [] # 预测模型feed vars - #save_inference_fetch_varnames: [] # 预测模型 fetch vars - #init_model_path: "xxxx" # 加载模型 - - name: runner2 - class: single_infer - init_model_path: "increment/0" # 加载模型 +- name: runner1 + class: single_train + # num of epochs + epochs: 10 + # device to run training or infer + device: cpu + save_checkpoint_interval: 2 # save model interval of epochs + save_inference_interval: 4 # save inference + save_checkpoint_path: "increment" # save checkpoint path + save_inference_path: "inference" # save inference path + save_inference_feed_varnames: [] # feed vars of save inference + save_inference_fetch_varnames: [] # fetch vars of save inference + init_model_path: "" # load model path + fetch_period: 10 +- name: runner2 + class: single_infer + # num of epochs + epochs: 10 + # device to run training or infer + device: cpu + init_model_path: "increment/0" # load model path -# 执行器,每轮要跑的所有阶段 +# runner will run all the phase in each epoch phase: - - name: phase1 - model: "{workspace}/model.py" # 模型路径 - dataset_name: dataset_train # 名字,用来区分不同的阶段 - thread_num: 1 # 线程数 -# - name: phase2 -# model: "{workspace}/model.py" # 模型路径 -# dataset_name: dataset_infer # 名字,用来区分不同的阶段 -# thread_num: 1 # 线程数 +- name: phase1 + model: "{workspace}/model.py" # user-defined model + dataset_name: dataset_train # select dataset by name + thread_num: 1 +#- name: phase2 +# model: "{workspace}/model.py" # user-defined model +# dataset_name: dataset_infer # select dataset by name +# thread_num: 1