未验证 提交 74ca73b8 编写于 作者: D daminglu 提交者: GitHub

Update trainer api (#10674)

上级 6af0593c
......@@ -13,29 +13,35 @@
# limitations under the License.
import core
import framework
import executor
import framework
import io
import unique_name
from trainer import check_and_get_place
__all__ = ['Inferencer', ]
class Inferencer(object):
def __init__(self, param_path, place=None):
def __init__(self, infer_func, param_path, place=None):
"""
:param param_path: the path where the inference model is saved by fluid.io.save_inference_model
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
"""
self.param_path = param_path
self.scope = core.Scope()
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
self.exe = executor.Executor(check_and_get_place(place))
with executor.scope_guard(self.scope):
# load params from param_path into scope
[self.inference_program, _,
self.fetch_targets] = io.load_inference_model(
executor=self.exe, dirname=param_path)
io.load_params(self.exe, param_path, self.inference_program)
def infer(self, inputs, return_numpy=True):
"""
......@@ -51,7 +57,7 @@ class Inferencer(object):
with executor.scope_guard(self.scope):
results = self.exe.run(self.inference_program,
feed=inputs,
fetch_list=self.fetch_targets,
fetch_list=[self.predict_var],
return_numpy=return_numpy)
return results
......@@ -48,12 +48,11 @@ def linear():
return avg_loss
def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
train_func=linear,
infer_func=inference_program,
train_func=train_program,
place=place,
optimizer=fluid.optimizer.SGD(learning_rate=0.001))
......@@ -72,11 +71,7 @@ def train(use_cuda, save_dirname):
'''
if float(test_metrics[0]) < 20.0:
if save_dirname is not None:
# NOT clear yet
# fluid.io.save_inference_model(save_dirname, ['x'], [y_predict])
# trainer.save_params(save_dirname)
# https://github.com/PaddlePaddle/Paddle/pull/10445
trainer.save_inference_model(save_dirname)
trainer.save_params(save_dirname)
return
trainer.train(
......@@ -87,12 +82,13 @@ def train(use_cuda, save_dirname):
# infer
def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
if save_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 10
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
......@@ -108,8 +104,8 @@ def main(use_cuda):
# Directory for saving the trained model
save_dirname = "fit_a_line.inference.model"
train(use_cuda, save_dirname)
infer(use_cuda, save_dirname)
train(use_cuda, linear, save_dirname)
infer(use_cuda, inference_program, save_dirname)
class TestFitALine(unittest.TestCase):
......
......@@ -53,48 +53,40 @@ def train_program():
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc
return avg_cost
acc = fluid.layers.accuracy(input=predict, label=label)
return [avg_cost, acc]
def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer(
train_func=train_program,
infer_func=inference_program,
place=place,
optimizer=optimizer)
train_func=train_program, place=place, optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0:
# trainer.save_params(save_dirname)
trainer.save_inference_model(save_dirname)
# TODO: Uncomment this part once we are sure that .train is working
# test_reader = paddle.batch(
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
# test_metrics = trainer.test(reader=test_reader)
# avg_cost_set = test_metrics[0]
# acc_set = test_metrics[1]
#
# # get test acc and loss
# acc = numpy.array(acc_set).mean()
# avg_cost = numpy.array(avg_cost_set).mean()
#
# print("avg_cost: %s" % avg_cost)
# print("acc : %s" % acc)
#
# if float(acc) > 0.2: # Smaller value to increase CI speed
# trainer.save_params(save_dirname)
# else:
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(
reader=test_reader, feed_order=['img', 'label'])
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]
# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -108,10 +100,11 @@ def train(use_cuda, save_dirname):
feed_order=['img', 'label'])
def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
......@@ -126,8 +119,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_conv.inference.model"
# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
if __name__ == '__main__':
......
......@@ -40,47 +40,40 @@ def train_program():
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc
return avg_cost
acc = fluid.layers.accuracy(input=predict, label=label)
return [avg_cost, acc]
def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer(
train_func=train_program,
infer_func=inference_program,
place=place,
optimizer=optimizer)
train_func=train_program, place=place, optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0:
trainer.save_inference_model(save_dirname)
# TODO: Uncomment this part once we are sure that .train is working
# test_reader = paddle.batch(
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
# test_metrics = trainer.test(reader=test_reader)
# avg_cost_set = test_metrics[0]
# acc_set = test_metrics[1]
#
# # get test acc and loss
# acc = numpy.array(acc_set).mean()
# avg_cost = numpy.array(avg_cost_set).mean()
#
# print("avg_cost: %s" % avg_cost)
# print("acc : %s" % acc)
#
# if float(acc) > 0.2: # Smaller value to increase CI speed
# trainer.save_params(save_dirname)
# else:
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(
reader=test_reader, feed_order=['img', 'label'])
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]
# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -94,10 +87,11 @@ def train(use_cuda, save_dirname):
feed_order=['img', 'label'])
def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
......@@ -112,8 +106,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_mlp.inference.model"
# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
if __name__ == '__main__':
......
......@@ -90,7 +90,7 @@ def train_program(is_sparse):
return avg_cost
def train(use_cuda, is_sparse, save_path):
def train(use_cuda, train_program, save_path):
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
test_reader = paddle.batch(
......@@ -105,23 +105,21 @@ def train(use_cuda, is_sparse, save_path):
print("loss= ", avg_cost)
if avg_cost < 5.0:
trainer.save_inference_model(save_path)
trainer.save_params(save_path)
return
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
trainer = fluid.Trainer(
partial(train_program, is_sparse),
partial(inference_program, is_sparse),
fluid.optimizer.SGD(learning_rate=0.001),
place=place)
train_program, fluid.optimizer.SGD(learning_rate=0.001), place=place)
trainer.train(
reader=train_reader, num_epochs=1, event_handler=event_handler)
def infer(use_cuda, is_sparse, save_path):
def infer(use_cuda, inference_program, save_path):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_path, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_path, place=place)
lod = [0, 1]
first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
......@@ -144,9 +142,9 @@ def main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "word2vec.inference.model"
train(use_cuda, is_sparse, save_path)
infer(use_cuda, is_sparse, save_path)
save_path = "word2vec.params"
train(use_cuda, partial(train_program, is_sparse), save_path)
infer(use_cuda, partial(inference_program, is_sparse), save_path)
if __name__ == '__main__':
......
......@@ -92,19 +92,13 @@ class Trainer(object):
place: The device place of this trainer.
"""
def __init__(self,
train_func,
infer_func,
optimizer,
param_path=None,
place=None):
def __init__(self, train_func, optimizer, param_path=None, place=None):
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
if not isinstance(optimizer, opt_module.Optimizer):
raise TypeError("The optimizer should be an instance of Optimizer")
self.infer_func = infer_func
self.scope = core.Scope()
self.startup_program = framework.Program()
......@@ -226,15 +220,6 @@ class Trainer(object):
exe = executor.Executor(self.place)
io.save_persistables(exe, dirname=param_path)
def save_inference_model(self, model_path):
inference_program = framework.Program()
with framework.program_guard(inference_program):
with unique_name.guard():
predict_var = self.infer_func()
predict_var = self.train_program.block(0).var(predict_var.name)
exe = executor.Executor(self.place)
io.save_inference_model(model_path, [], [predict_var], exe)
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册