From 70d1dc5722a577e5d8bc1dbfc8bb379c3eab9e40 Mon Sep 17 00:00:00 2001 From: tangwei Date: Thu, 14 May 2020 14:28:54 +0800 Subject: [PATCH] add paddle cloud run --- core/trainers/single_trainer.py | 9 ++++++++- core/trainers/transpiler_trainer.py | 14 ++++++++++---- example/cloud/config.yaml | 22 ++++------------------ 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/core/trainers/single_trainer.py b/core/trainers/single_trainer.py index d410ef15..c579bd37 100755 --- a/core/trainers/single_trainer.py +++ b/core/trainers/single_trainer.py @@ -18,6 +18,8 @@ Training use fluid with one node only. from __future__ import print_function import logging +import time + import paddle.fluid as fluid from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer @@ -104,14 +106,19 @@ class SingleTrainer(TranspileTrainer): def dataset_train(self, context): dataset = self._get_dataset("TRAIN") - epochs = envs.get_global_env("train.epochs") + ins = self._get_dataset_ins() + epochs = envs.get_global_env("train.epochs") for i in range(epochs): + begin_time = time.time() self._exe.train_from_dataset(program=fluid.default_main_program(), dataset=dataset, fetch_list=self.fetch_vars, fetch_info=self.fetch_alias, print_period=self.fetch_period) + end_time = time.time() + times = end_time-begin_time + print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times)) self.save(i, "train", is_fleet=False) context['status'] = 'infer_pass' diff --git a/core/trainers/transpiler_trainer.py b/core/trainers/transpiler_trainer.py index 3dc8bfd6..6d765aa1 100755 --- a/core/trainers/transpiler_trainer.py +++ b/core/trainers/transpiler_trainer.py @@ -70,6 +70,13 @@ class TranspileTrainer(Trainer): exit(0) return dataloader + def _get_dataset_ins(self): + count = 0 + for f in self.files: + for _, _ in enumerate(open(f, 'r')): + count += 1 + return count + def _get_dataset(self, state="TRAIN"): if state == "TRAIN": inputs = self.model.get_inputs() @@ -82,8 +89,7 @@ class TranspileTrainer(Trainer): train_data_path = envs.get_global_env( "test_data_path", None, namespace) - #threads = int(envs.get_runtime_environ("train.trainer.threads")) - threads = 2 + threads = int(envs.get_runtime_environ("train.trainer.threads")) batch_size = envs.get_global_env("batch_size", None, namespace) reader_class = envs.get_global_env("class", None, namespace) abs_dir = os.path.dirname(os.path.abspath(__file__)) @@ -106,8 +112,8 @@ class TranspileTrainer(Trainer): os.path.join(train_data_path, x) for x in os.listdir(train_data_path) ] - - dataset.set_filelist(file_list) + self.files = file_list + dataset.set_filelist(self.files) debug_mode = envs.get_global_env("reader_debug_mode", False, namespace) if debug_mode: diff --git a/example/cloud/config.yaml b/example/cloud/config.yaml index ef79bfb5..8cec449b 100755 --- a/example/cloud/config.yaml +++ b/example/cloud/config.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - train: trainer: # for cluster training @@ -21,7 +7,7 @@ train: workspace: "paddlerec.models.rank.dnn" reader: - batch_size: 2 + batch_size: 512 class: "{workspace}/../criteo_reader.py" train_data_path: "train_data" reader_debug_mode: False @@ -31,10 +17,10 @@ train: hyper_parameters: sparse_inputs_slots: 27 sparse_feature_number: 1000001 - sparse_feature_dim: 9 + sparse_feature_dim: 10 dense_input_dim: 13 - fc_sizes: [512, 256, 128, 32] - learning_rate: 0.001 + fc_sizes: [400, 400, 400] + learning_rate: 0.0001 optimizer: adam save: -- GitLab