diff --git a/core/trainers/single_trainer.py b/core/trainers/single_trainer.py index d410ef155bda6a043b66ddd1e2591389a322d172..c579bd377c02550f387644f5b4c7bda8f585bb44 100755 --- a/core/trainers/single_trainer.py +++ b/core/trainers/single_trainer.py @@ -18,6 +18,8 @@ Training use fluid with one node only. from __future__ import print_function import logging +import time + import paddle.fluid as fluid from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer @@ -104,14 +106,19 @@ class SingleTrainer(TranspileTrainer): def dataset_train(self, context): dataset = self._get_dataset("TRAIN") - epochs = envs.get_global_env("train.epochs") + ins = self._get_dataset_ins() + epochs = envs.get_global_env("train.epochs") for i in range(epochs): + begin_time = time.time() self._exe.train_from_dataset(program=fluid.default_main_program(), dataset=dataset, fetch_list=self.fetch_vars, fetch_info=self.fetch_alias, print_period=self.fetch_period) + end_time = time.time() + times = end_time-begin_time + print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times)) self.save(i, "train", is_fleet=False) context['status'] = 'infer_pass' diff --git a/core/trainers/transpiler_trainer.py b/core/trainers/transpiler_trainer.py index 3dc8bfd6e04fb3da2b217f84126fac8101102945..6d765aa19d525e4744e801a371b4e7ee586e5d1d 100755 --- a/core/trainers/transpiler_trainer.py +++ b/core/trainers/transpiler_trainer.py @@ -70,6 +70,13 @@ class TranspileTrainer(Trainer): exit(0) return dataloader + def _get_dataset_ins(self): + count = 0 + for f in self.files: + for _, _ in enumerate(open(f, 'r')): + count += 1 + return count + def _get_dataset(self, state="TRAIN"): if state == "TRAIN": inputs = self.model.get_inputs() @@ -82,8 +89,7 @@ class TranspileTrainer(Trainer): train_data_path = envs.get_global_env( "test_data_path", None, namespace) - #threads = int(envs.get_runtime_environ("train.trainer.threads")) - threads = 2 + threads = int(envs.get_runtime_environ("train.trainer.threads")) batch_size = envs.get_global_env("batch_size", None, namespace) reader_class = envs.get_global_env("class", None, namespace) abs_dir = os.path.dirname(os.path.abspath(__file__)) @@ -106,8 +112,8 @@ class TranspileTrainer(Trainer): os.path.join(train_data_path, x) for x in os.listdir(train_data_path) ] - - dataset.set_filelist(file_list) + self.files = file_list + dataset.set_filelist(self.files) debug_mode = envs.get_global_env("reader_debug_mode", False, namespace) if debug_mode: diff --git a/example/cloud/config.yaml b/example/cloud/config.yaml index ef79bfb5020762d4170d03f16df772a19f2a6897..8cec449bda487c47cba064773a70a0117ff8037b 100755 --- a/example/cloud/config.yaml +++ b/example/cloud/config.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - train: trainer: # for cluster training @@ -21,7 +7,7 @@ train: workspace: "paddlerec.models.rank.dnn" reader: - batch_size: 2 + batch_size: 512 class: "{workspace}/../criteo_reader.py" train_data_path: "train_data" reader_debug_mode: False @@ -31,10 +17,10 @@ train: hyper_parameters: sparse_inputs_slots: 27 sparse_feature_number: 1000001 - sparse_feature_dim: 9 + sparse_feature_dim: 10 dense_input_dim: 13 - fc_sizes: [512, 256, 128, 32] - learning_rate: 0.001 + fc_sizes: [400, 400, 400] + learning_rate: 0.0001 optimizer: adam save: