From 8334c08d2d5c4e1b836bd8db82129ec1230a69a9 Mon Sep 17 00:00:00 2001 From: tangwei Date: Wed, 15 Apr 2020 15:15:34 +0800 Subject: [PATCH] structure rebuild --- fleetrec/core/trainer/details/local_engine.py | 6 +- .../built_in/cluster_training_local.yaml | 10 +++ .../built_in/cluster_training_mpi.yaml | 10 +++ .../cluster_training_user_define.yaml | 2 + .../ctr-dnn_train.yaml} | 7 ++- .../examples/{train.py => built_in/run.py} | 2 +- .../examples/built_in/single_training.yaml | 2 + .../{ => built_in}/user_define_trainer.py | 1 + fleetrec/examples/ctr-dnn_train_cluster.yaml | 62 ------------------- .../examples/ctr-dnn_train_userdefine.yaml | 52 ---------------- 10 files changed, 34 insertions(+), 120 deletions(-) create mode 100644 fleetrec/examples/built_in/cluster_training_local.yaml create mode 100644 fleetrec/examples/built_in/cluster_training_mpi.yaml create mode 100644 fleetrec/examples/built_in/cluster_training_user_define.yaml rename fleetrec/examples/{ctr-dnn_train_single.yaml => built_in/ctr-dnn_train.yaml} (88%) rename fleetrec/examples/{train.py => built_in/run.py} (94%) create mode 100644 fleetrec/examples/built_in/single_training.yaml rename fleetrec/examples/{ => built_in}/user_define_trainer.py (96%) delete mode 100644 fleetrec/examples/ctr-dnn_train_cluster.yaml delete mode 100644 fleetrec/examples/ctr-dnn_train_userdefine.yaml diff --git a/fleetrec/core/trainer/details/local_engine.py b/fleetrec/core/trainer/details/local_engine.py index 9a1c4ba0..1610c530 100644 --- a/fleetrec/core/trainer/details/local_engine.py +++ b/fleetrec/core/trainer/details/local_engine.py @@ -38,7 +38,7 @@ def start_procs(args, yaml): user_endpoints_ips = [x.split(":")[0] for x in user_endpoints.split(",")] user_endpoints_port = [x.split(":")[1] for x in user_endpoints.split(",")] - factory = "fleetrec.trainer.factory" + factory = "fleetrec.core.factory" cmd = [sys.executable, "-u", "-m", factory, yaml] for i in range(server_num): @@ -91,11 +91,11 @@ def start_procs(args, yaml): procs[i].terminate() print("all parameter server are killed", file=sys.stderr) -class Launch(): + +class Launch: def __init__(self, envs, trainer): self.envs = envs self.trainer = trainer def run(self): start_procs(self.envs, self.trainer) - diff --git a/fleetrec/examples/built_in/cluster_training_local.yaml b/fleetrec/examples/built_in/cluster_training_local.yaml new file mode 100644 index 00000000..bf878e8c --- /dev/null +++ b/fleetrec/examples/built_in/cluster_training_local.yaml @@ -0,0 +1,10 @@ + +trainer: "LocalClusterTraining" + +pserver_num: 2 +trainer_num: 2 +start_port: 36001 +log_dirname: "logs" + +strategy: + mode: "async" diff --git a/fleetrec/examples/built_in/cluster_training_mpi.yaml b/fleetrec/examples/built_in/cluster_training_mpi.yaml new file mode 100644 index 00000000..19974f40 --- /dev/null +++ b/fleetrec/examples/built_in/cluster_training_mpi.yaml @@ -0,0 +1,10 @@ + +trainer: "MPIClusterTraining" + +pserver_num: 2 +trainer_num: 2 +start_port: 36001 +log_dirname: "logs" + +strategy: + mode: "async" diff --git a/fleetrec/examples/built_in/cluster_training_user_define.yaml b/fleetrec/examples/built_in/cluster_training_user_define.yaml new file mode 100644 index 00000000..973bada5 --- /dev/null +++ b/fleetrec/examples/built_in/cluster_training_user_define.yaml @@ -0,0 +1,2 @@ +trainer: "UserDefineTrainer" +location: "/root/FleetRec/fleetrec/examples/user_define_trainer.py" diff --git a/fleetrec/examples/ctr-dnn_train_single.yaml b/fleetrec/examples/built_in/ctr-dnn_train.yaml similarity index 88% rename from fleetrec/examples/ctr-dnn_train_single.yaml rename to fleetrec/examples/built_in/ctr-dnn_train.yaml index 50d25cad..5c4af64a 100644 --- a/fleetrec/examples/ctr-dnn_train_single.yaml +++ b/fleetrec/examples/built_in/ctr-dnn_train.yaml @@ -15,9 +15,10 @@ train: threads: 12 epochs: 10 - trainer: "SingleTraining" + trainer: "single_training.yaml" reader: + mode: "dataset" batch_size: 2 class: "fleetrec.models.ctr_dnn.data_generator" train_data_path: "/root/FleetRec/fleetrec/models/ctr_dnn/data/train/" @@ -29,7 +30,7 @@ train: sparse_feature_number: 1000001 sparse_feature_dim: 8 dense_input_dim: 13 - fc_sizes: [512, 256, 128, 32] + fc_sizes: [512, 256, 128, 32] learning_rate: 0.001 save: @@ -40,6 +41,8 @@ train: inference: dirname: "models_for_inference" epoch_interval: 4 + feed_varnames: ["C1", "C2", "C3"] + fetch_varnames: "predict" save_last: True evaluate: diff --git a/fleetrec/examples/train.py b/fleetrec/examples/built_in/run.py similarity index 94% rename from fleetrec/examples/train.py rename to fleetrec/examples/built_in/run.py index 7a72fb78..a1c4281d 100644 --- a/fleetrec/examples/train.py +++ b/fleetrec/examples/built_in/run.py @@ -14,7 +14,7 @@ import os -from fleetrec.trainer.factory import TrainerFactory +from fleetrec.core.factory import TrainerFactory if __name__ == "__main__": diff --git a/fleetrec/examples/built_in/single_training.yaml b/fleetrec/examples/built_in/single_training.yaml new file mode 100644 index 00000000..8fb437ae --- /dev/null +++ b/fleetrec/examples/built_in/single_training.yaml @@ -0,0 +1,2 @@ + +trainer: "SingleTraining" \ No newline at end of file diff --git a/fleetrec/examples/user_define_trainer.py b/fleetrec/examples/built_in/user_define_trainer.py similarity index 96% rename from fleetrec/examples/user_define_trainer.py rename to fleetrec/examples/built_in/user_define_trainer.py index 549d42b8..70841f9a 100644 --- a/fleetrec/examples/user_define_trainer.py +++ b/fleetrec/examples/built_in/user_define_trainer.py @@ -21,6 +21,7 @@ from fleetrec.utils import envs class UserDefineTrainer(TranspileTrainer): def __init__(self, config=None): TranspileTrainer.__init__(self, config) + print("this is a demo about how to use user define trainer in fleet-rec") def processor_register(self): self.regist_context_processor('uninit', self.instance) diff --git a/fleetrec/examples/ctr-dnn_train_cluster.yaml b/fleetrec/examples/ctr-dnn_train_cluster.yaml deleted file mode 100644 index f4c8f2b1..00000000 --- a/fleetrec/examples/ctr-dnn_train_cluster.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -train: - threads: 12 - epochs: 10 - trainer: "ClusterTraining" - container: "local" - - pserver_num: 2 - trainer_num: 2 - start_port: 36001 - log_dirname: "logs" - - strategy: - mode: "async" - - reader: - mode: "dataset" - batch_size: 2 - pipe_command: "python /paddle/eleps/fleetrec/models/ctr_dnn/dataset.py" - train_data_path: "/paddle/eleps/fleetrec/models/ctr_dnn/data/train" - - model: - models: "fleetrec.models.ctr_dnn.model" - hyper_parameters: - sparse_inputs_slots: 27 - sparse_feature_number: 1000001 - sparse_feature_dim: 8 - dense_input_dim: 13 - fc_sizes: [512, 256, 128, 32] - learning_rate: 0.001 - - save: - increment: - dirname: "models_for_increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "models_for_inference" - epoch_interval: 4 - feed_varnames: ["C1", "C2", "C3"] - fetch_varnames: "predict" - save_last: True - -evaluate: - batch_size: 32 - train_thread_num: 12 - reader: "reader.py" - - diff --git a/fleetrec/examples/ctr-dnn_train_userdefine.yaml b/fleetrec/examples/ctr-dnn_train_userdefine.yaml deleted file mode 100644 index 423efedc..00000000 --- a/fleetrec/examples/ctr-dnn_train_userdefine.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -train: - threads: 12 - epochs: 10 - trainer: "UserDefineTrainer" - location: "/root/FleetRec/fleetrec/examples/user_define_trainer.py" - - reader: - batch_size: 2 - class: "fleetrec.models.ctr_dnn.data_generator" - train_data_path: "/root/FleetRec/fleetrec/models/ctr_dnn/data/train/" - - model: - models: "fleetrec.models.ctr_dnn.model" - hyper_parameters: - sparse_inputs_slots: 27 - sparse_feature_number: 1000001 - sparse_feature_dim: 8 - dense_input_dim: 13 - fc_sizes: [512, 256, 128, 32] - learning_rate: 0.001 - - save: - increment: - dirname: "models_for_increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "models_for_inference" - epoch_interval: 4 - save_last: True - -evaluate: - batch_size: 32 - train_thread_num: 12 - reader: "reader.py" - - -- GitLab