提交 70d1dc57 编写于 作者: T tangwei

add paddle cloud run

上级 5adee377
......@@ -18,6 +18,8 @@ Training use fluid with one node only.
from __future__ import print_function
import logging
import time
import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
......@@ -104,14 +106,19 @@ class SingleTrainer(TranspileTrainer):
def dataset_train(self, context):
dataset = self._get_dataset("TRAIN")
epochs = envs.get_global_env("train.epochs")
ins = self._get_dataset_ins()
epochs = envs.get_global_env("train.epochs")
for i in range(epochs):
begin_time = time.time()
self._exe.train_from_dataset(program=fluid.default_main_program(),
dataset=dataset,
fetch_list=self.fetch_vars,
fetch_info=self.fetch_alias,
print_period=self.fetch_period)
end_time = time.time()
times = end_time-begin_time
print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times))
self.save(i, "train", is_fleet=False)
context['status'] = 'infer_pass'
......
......@@ -70,6 +70,13 @@ class TranspileTrainer(Trainer):
exit(0)
return dataloader
def _get_dataset_ins(self):
count = 0
for f in self.files:
for _, _ in enumerate(open(f, 'r')):
count += 1
return count
def _get_dataset(self, state="TRAIN"):
if state == "TRAIN":
inputs = self.model.get_inputs()
......@@ -82,8 +89,7 @@ class TranspileTrainer(Trainer):
train_data_path = envs.get_global_env(
"test_data_path", None, namespace)
#threads = int(envs.get_runtime_environ("train.trainer.threads"))
threads = 2
threads = int(envs.get_runtime_environ("train.trainer.threads"))
batch_size = envs.get_global_env("batch_size", None, namespace)
reader_class = envs.get_global_env("class", None, namespace)
abs_dir = os.path.dirname(os.path.abspath(__file__))
......@@ -106,8 +112,8 @@ class TranspileTrainer(Trainer):
os.path.join(train_data_path, x)
for x in os.listdir(train_data_path)
]
dataset.set_filelist(file_list)
self.files = file_list
dataset.set_filelist(self.files)
debug_mode = envs.get_global_env("reader_debug_mode", False, namespace)
if debug_mode:
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
train:
trainer:
# for cluster training
......@@ -21,7 +7,7 @@ train:
workspace: "paddlerec.models.rank.dnn"
reader:
batch_size: 2
batch_size: 512
class: "{workspace}/../criteo_reader.py"
train_data_path: "train_data"
reader_debug_mode: False
......@@ -31,10 +17,10 @@ train:
hyper_parameters:
sparse_inputs_slots: 27
sparse_feature_number: 1000001
sparse_feature_dim: 9
sparse_feature_dim: 10
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
learning_rate: 0.001
fc_sizes: [400, 400, 400]
learning_rate: 0.0001
optimizer: adam
save:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册