提交 8334c08d 编写于 作者: T tangwei

structure rebuild

上级 baf8765b
......@@ -38,7 +38,7 @@ def start_procs(args, yaml):
user_endpoints_ips = [x.split(":")[0] for x in user_endpoints.split(",")]
user_endpoints_port = [x.split(":")[1] for x in user_endpoints.split(",")]
factory = "fleetrec.trainer.factory"
factory = "fleetrec.core.factory"
cmd = [sys.executable, "-u", "-m", factory, yaml]
for i in range(server_num):
......@@ -91,11 +91,11 @@ def start_procs(args, yaml):
procs[i].terminate()
print("all parameter server are killed", file=sys.stderr)
class Launch():
class Launch:
def __init__(self, envs, trainer):
self.envs = envs
self.trainer = trainer
def run(self):
start_procs(self.envs, self.trainer)
trainer: "LocalClusterTraining"
pserver_num: 2
trainer_num: 2
start_port: 36001
log_dirname: "logs"
strategy:
mode: "async"
trainer: "MPIClusterTraining"
pserver_num: 2
trainer_num: 2
start_port: 36001
log_dirname: "logs"
strategy:
mode: "async"
trainer: "UserDefineTrainer"
location: "/root/FleetRec/fleetrec/examples/user_define_trainer.py"
......@@ -15,9 +15,10 @@
train:
threads: 12
epochs: 10
trainer: "SingleTraining"
trainer: "single_training.yaml"
reader:
mode: "dataset"
batch_size: 2
class: "fleetrec.models.ctr_dnn.data_generator"
train_data_path: "/root/FleetRec/fleetrec/models/ctr_dnn/data/train/"
......@@ -29,7 +30,7 @@ train:
sparse_feature_number: 1000001
sparse_feature_dim: 8
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
fc_sizes: [512, 256, 128, 32]
learning_rate: 0.001
save:
......@@ -40,6 +41,8 @@ train:
inference:
dirname: "models_for_inference"
epoch_interval: 4
feed_varnames: ["C1", "C2", "C3"]
fetch_varnames: "predict"
save_last: True
evaluate:
......
......@@ -14,7 +14,7 @@
import os
from fleetrec.trainer.factory import TrainerFactory
from fleetrec.core.factory import TrainerFactory
if __name__ == "__main__":
......
trainer: "SingleTraining"
\ No newline at end of file
......@@ -21,6 +21,7 @@ from fleetrec.utils import envs
class UserDefineTrainer(TranspileTrainer):
def __init__(self, config=None):
TranspileTrainer.__init__(self, config)
print("this is a demo about how to use user define trainer in fleet-rec")
def processor_register(self):
self.regist_context_processor('uninit', self.instance)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
threads: 12
epochs: 10
trainer: "ClusterTraining"
container: "local"
pserver_num: 2
trainer_num: 2
start_port: 36001
log_dirname: "logs"
strategy:
mode: "async"
reader:
mode: "dataset"
batch_size: 2
pipe_command: "python /paddle/eleps/fleetrec/models/ctr_dnn/dataset.py"
train_data_path: "/paddle/eleps/fleetrec/models/ctr_dnn/data/train"
model:
models: "fleetrec.models.ctr_dnn.model"
hyper_parameters:
sparse_inputs_slots: 27
sparse_feature_number: 1000001
sparse_feature_dim: 8
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
learning_rate: 0.001
save:
increment:
dirname: "models_for_increment"
epoch_interval: 2
save_last: True
inference:
dirname: "models_for_inference"
epoch_interval: 4
feed_varnames: ["C1", "C2", "C3"]
fetch_varnames: "predict"
save_last: True
evaluate:
batch_size: 32
train_thread_num: 12
reader: "reader.py"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
threads: 12
epochs: 10
trainer: "UserDefineTrainer"
location: "/root/FleetRec/fleetrec/examples/user_define_trainer.py"
reader:
batch_size: 2
class: "fleetrec.models.ctr_dnn.data_generator"
train_data_path: "/root/FleetRec/fleetrec/models/ctr_dnn/data/train/"
model:
models: "fleetrec.models.ctr_dnn.model"
hyper_parameters:
sparse_inputs_slots: 27
sparse_feature_number: 1000001
sparse_feature_dim: 8
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
learning_rate: 0.001
save:
increment:
dirname: "models_for_increment"
epoch_interval: 2
save_last: True
inference:
dirname: "models_for_inference"
epoch_interval: 4
save_last: True
evaluate:
batch_size: 32
train_thread_num: 12
reader: "reader.py"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册