From 70d1dc5722a577e5d8bc1dbfc8bb379c3eab9e40 Mon Sep 17 00:00:00 2001
From: tangwei <tangwei12@baidu.com>
Date: Thu, 14 May 2020 14:28:54 +0800
Subject: [PATCH] add paddle cloud run

---
 core/trainers/single_trainer.py     |  9 ++++++++-
 core/trainers/transpiler_trainer.py | 14 ++++++++++----
 example/cloud/config.yaml           | 22 ++++------------------
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/core/trainers/single_trainer.py b/core/trainers/single_trainer.py
index d410ef15..c579bd37 100755
--- a/core/trainers/single_trainer.py
+++ b/core/trainers/single_trainer.py
@@ -18,6 +18,8 @@ Training use fluid with one node only.
 
 from __future__ import print_function
 import logging
+import time
+
 import paddle.fluid as fluid
 
 from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
@@ -104,14 +106,19 @@ class SingleTrainer(TranspileTrainer):
 
     def dataset_train(self, context):
         dataset = self._get_dataset("TRAIN")
-        epochs = envs.get_global_env("train.epochs")
+        ins = self._get_dataset_ins()
 
+        epochs = envs.get_global_env("train.epochs")
         for i in range(epochs):
+            begin_time = time.time()
             self._exe.train_from_dataset(program=fluid.default_main_program(),
                                          dataset=dataset,
                                          fetch_list=self.fetch_vars,
                                          fetch_info=self.fetch_alias,
                                          print_period=self.fetch_period)
+            end_time = time.time()
+            times = end_time-begin_time
+            print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times))
 
             self.save(i, "train", is_fleet=False)
         context['status'] = 'infer_pass'
diff --git a/core/trainers/transpiler_trainer.py b/core/trainers/transpiler_trainer.py
index 3dc8bfd6..6d765aa1 100755
--- a/core/trainers/transpiler_trainer.py
+++ b/core/trainers/transpiler_trainer.py
@@ -70,6 +70,13 @@ class TranspileTrainer(Trainer):
             exit(0)
         return dataloader
 
+    def _get_dataset_ins(self):
+        count = 0
+        for f in self.files:
+            for _, _ in enumerate(open(f, 'r')):
+                count += 1
+        return count
+
     def _get_dataset(self, state="TRAIN"):
         if state == "TRAIN":
             inputs = self.model.get_inputs()
@@ -82,8 +89,7 @@ class TranspileTrainer(Trainer):
             train_data_path = envs.get_global_env(
                 "test_data_path", None, namespace)
 
-        #threads = int(envs.get_runtime_environ("train.trainer.threads"))
-        threads = 2
+        threads = int(envs.get_runtime_environ("train.trainer.threads"))
         batch_size = envs.get_global_env("batch_size", None, namespace)
         reader_class = envs.get_global_env("class", None, namespace)
         abs_dir = os.path.dirname(os.path.abspath(__file__))
@@ -106,8 +112,8 @@ class TranspileTrainer(Trainer):
             os.path.join(train_data_path, x)
             for x in os.listdir(train_data_path)
         ]
-
-        dataset.set_filelist(file_list)
+        self.files = file_list
+        dataset.set_filelist(self.files)
 
         debug_mode = envs.get_global_env("reader_debug_mode", False, namespace)
         if debug_mode:
diff --git a/example/cloud/config.yaml b/example/cloud/config.yaml
index ef79bfb5..8cec449b 100755
--- a/example/cloud/config.yaml
+++ b/example/cloud/config.yaml
@@ -1,17 +1,3 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 train:
   trainer:
     # for cluster training
@@ -21,7 +7,7 @@ train:
   workspace: "paddlerec.models.rank.dnn"
 
   reader:
-    batch_size: 2
+    batch_size: 512
     class: "{workspace}/../criteo_reader.py"
     train_data_path: "train_data"
     reader_debug_mode: False
@@ -31,10 +17,10 @@ train:
     hyper_parameters:
       sparse_inputs_slots: 27
       sparse_feature_number: 1000001
-      sparse_feature_dim: 9
+      sparse_feature_dim: 10
       dense_input_dim: 13
-      fc_sizes: [512, 256, 128, 32]
-      learning_rate: 0.001
+      fc_sizes: [400, 400, 400]
+      learning_rate: 0.0001
       optimizer: adam
 
   save:
-- 
GitLab