From a6b2ae3953135836e561ae2903fd235a58b83ec3 Mon Sep 17 00:00:00 2001 From: qjing666 Date: Wed, 26 Feb 2020 16:15:35 +0800 Subject: [PATCH] add k8s deployment solution --- .../k8s_deployment/trainer/fl_trainer.py | 41 ------------------- .../k8s_deployment/trainer/run_trainer.sh | 14 ------- 2 files changed, 55 deletions(-) delete mode 100644 paddle_fl/examples/k8s_deployment/trainer/fl_trainer.py delete mode 100644 paddle_fl/examples/k8s_deployment/trainer/run_trainer.sh diff --git a/paddle_fl/examples/k8s_deployment/trainer/fl_trainer.py b/paddle_fl/examples/k8s_deployment/trainer/fl_trainer.py deleted file mode 100644 index ef19579..0000000 --- a/paddle_fl/examples/k8s_deployment/trainer/fl_trainer.py +++ /dev/null @@ -1,41 +0,0 @@ -from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory -from paddle_fl.core.master.fl_job import FLRunTimeJob -import numpy as np -import sys -import logging -import time -logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) - - -def reader(): - for i in range(1000): - data_dict = {} - for i in range(3): - data_dict[str(i)] = np.random.rand(1, 5).astype('float32') - data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') - yield data_dict - -trainer_id = int(sys.argv[1]) # trainer id for each guest -job_path = "fl_job_config" -job = FLRunTimeJob() -job.load_trainer_job(job_path, trainer_id) -#job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer -job._scheduler_ep = os.environ['FL_SCHEDULER_SERVICE_HOST'] + ":" + os.environ['FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'] -trainer = FLTrainerFactory().create_fl_trainer(job) -trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id) -trainer.start() -print(trainer._scheduler_ep, trainer._current_ep) -output_folder = "fl_model" -epoch_id = 0 -while not trainer.stop(): - print("batch %d start train" % (epoch_id)) - train_step = 0 - for data in reader(): - trainer.run(feed=data, fetch=[]) - train_step += 1 - if train_step == trainer._step: - break - epoch_id += 1 - if epoch_id % 5 == 0: - trainer.save_inference_program(output_folder) - diff --git a/paddle_fl/examples/k8s_deployment/trainer/run_trainer.sh b/paddle_fl/examples/k8s_deployment/trainer/run_trainer.sh deleted file mode 100644 index 88e27b1..0000000 --- a/paddle_fl/examples/k8s_deployment/trainer/run_trainer.sh +++ /dev/null @@ -1,14 +0,0 @@ -#Download config file -wget - -#Download image -sudo docker pull [paddle-fl image] - -#Build docker -sudo docker run --name paddlefl -it -v $PWD:/root [paddle-fl image] /bin/bash - -sudo docker cp /path/to/config paddlefl:/path/to/config/file/at/container - -#Run program - -python -u fl_trainer.py > trainer.log & -- GitLab