未验证 提交 74ca73b8 编写于 作者: D daminglu 提交者: GitHub

Update trainer api (#10674)

上级 6af0593c
...@@ -13,29 +13,35 @@ ...@@ -13,29 +13,35 @@
# limitations under the License. # limitations under the License.
import core import core
import framework
import executor import executor
import framework
import io import io
import unique_name
from trainer import check_and_get_place from trainer import check_and_get_place
__all__ = ['Inferencer', ] __all__ = ['Inferencer', ]
class Inferencer(object): class Inferencer(object):
def __init__(self, param_path, place=None): def __init__(self, infer_func, param_path, place=None):
""" """
:param param_path: the path where the inference model is saved by fluid.io.save_inference_model :param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference :param place: place to do the inference
""" """
self.param_path = param_path self.param_path = param_path
self.scope = core.Scope() self.scope = core.Scope()
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
self.exe = executor.Executor(check_and_get_place(place)) self.exe = executor.Executor(check_and_get_place(place))
with executor.scope_guard(self.scope): with executor.scope_guard(self.scope):
# load params from param_path into scope # load params from param_path into scope
[self.inference_program, _, io.load_params(self.exe, param_path, self.inference_program)
self.fetch_targets] = io.load_inference_model(
executor=self.exe, dirname=param_path)
def infer(self, inputs, return_numpy=True): def infer(self, inputs, return_numpy=True):
""" """
...@@ -51,7 +57,7 @@ class Inferencer(object): ...@@ -51,7 +57,7 @@ class Inferencer(object):
with executor.scope_guard(self.scope): with executor.scope_guard(self.scope):
results = self.exe.run(self.inference_program, results = self.exe.run(self.inference_program,
feed=inputs, feed=inputs,
fetch_list=self.fetch_targets, fetch_list=[self.predict_var],
return_numpy=return_numpy) return_numpy=return_numpy)
return results return results
...@@ -48,12 +48,11 @@ def linear(): ...@@ -48,12 +48,11 @@ def linear():
return avg_loss return avg_loss
def train(use_cuda, save_dirname): def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer( trainer = fluid.Trainer(
train_func=linear, train_func=train_program,
infer_func=inference_program,
place=place, place=place,
optimizer=fluid.optimizer.SGD(learning_rate=0.001)) optimizer=fluid.optimizer.SGD(learning_rate=0.001))
...@@ -72,11 +71,7 @@ def train(use_cuda, save_dirname): ...@@ -72,11 +71,7 @@ def train(use_cuda, save_dirname):
''' '''
if float(test_metrics[0]) < 20.0: if float(test_metrics[0]) < 20.0:
if save_dirname is not None: if save_dirname is not None:
# NOT clear yet trainer.save_params(save_dirname)
# fluid.io.save_inference_model(save_dirname, ['x'], [y_predict])
# trainer.save_params(save_dirname)
# https://github.com/PaddlePaddle/Paddle/pull/10445
trainer.save_inference_model(save_dirname)
return return
trainer.train( trainer.train(
...@@ -87,12 +82,13 @@ def train(use_cuda, save_dirname): ...@@ -87,12 +82,13 @@ def train(use_cuda, save_dirname):
# infer # infer
def infer(use_cuda, save_dirname=None): def infer(use_cuda, inference_program, save_dirname=None):
if save_dirname is None: if save_dirname is None:
return return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place) inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 10 batch_size = 10
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
...@@ -108,8 +104,8 @@ def main(use_cuda): ...@@ -108,8 +104,8 @@ def main(use_cuda):
# Directory for saving the trained model # Directory for saving the trained model
save_dirname = "fit_a_line.inference.model" save_dirname = "fit_a_line.inference.model"
train(use_cuda, save_dirname) train(use_cuda, linear, save_dirname)
infer(use_cuda, save_dirname) infer(use_cuda, inference_program, save_dirname)
class TestFitALine(unittest.TestCase): class TestFitALine(unittest.TestCase):
......
...@@ -53,48 +53,40 @@ def train_program(): ...@@ -53,48 +53,40 @@ def train_program():
predict = inference_program() predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label) acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc return [avg_cost, acc]
return avg_cost
def train(use_cuda, save_dirname): def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer( trainer = fluid.Trainer(
train_func=train_program, train_func=train_program, place=place, optimizer=optimizer)
infer_func=inference_program,
place=place,
optimizer=optimizer)
def event_handler(event): def event_handler(event):
if isinstance(event, fluid.EndEpochEvent): if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0: test_reader = paddle.batch(
# trainer.save_params(save_dirname) paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
trainer.save_inference_model(save_dirname) test_metrics = trainer.test(
reader=test_reader, feed_order=['img', 'label'])
# TODO: Uncomment this part once we are sure that .train is working avg_cost_set = test_metrics[0]
# test_reader = paddle.batch( acc_set = test_metrics[1]
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
# test_metrics = trainer.test(reader=test_reader) # get test acc and loss
# avg_cost_set = test_metrics[0] acc = numpy.array(acc_set).mean()
# acc_set = test_metrics[1] avg_cost = numpy.array(avg_cost_set).mean()
#
# # get test acc and loss print("avg_cost: %s" % avg_cost)
# acc = numpy.array(acc_set).mean() print("acc : %s" % acc)
# avg_cost = numpy.array(avg_cost_set).mean()
# if float(acc) > 0.2: # Smaller value to increase CI speed
# print("avg_cost: %s" % avg_cost) trainer.save_params(save_dirname)
# print("acc : %s" % acc) else:
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# if float(acc) > 0.2: # Smaller value to increase CI speed event.epoch + 1, float(avg_cost), float(acc)))
# trainer.save_params(save_dirname) if math.isnan(float(avg_cost)):
# else: sys.exit("got NaN loss, training failed.")
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -108,10 +100,11 @@ def train(use_cuda, save_dirname): ...@@ -108,10 +100,11 @@ def train(use_cuda, save_dirname):
feed_order=['img', 'label']) feed_order=['img', 'label'])
def infer(use_cuda, save_dirname=None): def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place) inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 1 batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0, tensor_img = numpy.random.uniform(-1.0, 1.0,
...@@ -126,8 +119,14 @@ def main(use_cuda): ...@@ -126,8 +119,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_conv.inference.model" save_dirname = "recognize_digits_conv.inference.model"
# call train() with is_local argument to run distributed train # call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname) train(
infer(use_cuda=use_cuda, save_dirname=save_dirname) use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -40,47 +40,40 @@ def train_program(): ...@@ -40,47 +40,40 @@ def train_program():
predict = inference_program() predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label) acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc return [avg_cost, acc]
return avg_cost
def train(use_cuda, save_dirname): def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer( trainer = fluid.Trainer(
train_func=train_program, train_func=train_program, place=place, optimizer=optimizer)
infer_func=inference_program,
place=place,
optimizer=optimizer)
def event_handler(event): def event_handler(event):
if isinstance(event, fluid.EndEpochEvent): if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0: test_reader = paddle.batch(
trainer.save_inference_model(save_dirname) paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(
# TODO: Uncomment this part once we are sure that .train is working reader=test_reader, feed_order=['img', 'label'])
# test_reader = paddle.batch( avg_cost_set = test_metrics[0]
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) acc_set = test_metrics[1]
# test_metrics = trainer.test(reader=test_reader)
# avg_cost_set = test_metrics[0] # get test acc and loss
# acc_set = test_metrics[1] acc = numpy.array(acc_set).mean()
# avg_cost = numpy.array(avg_cost_set).mean()
# # get test acc and loss
# acc = numpy.array(acc_set).mean() print("avg_cost: %s" % avg_cost)
# avg_cost = numpy.array(avg_cost_set).mean() print("acc : %s" % acc)
#
# print("avg_cost: %s" % avg_cost) if float(acc) > 0.2: # Smaller value to increase CI speed
# print("acc : %s" % acc) trainer.save_params(save_dirname)
# else:
# if float(acc) > 0.2: # Smaller value to increase CI speed print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# trainer.save_params(save_dirname) event.epoch + 1, float(avg_cost), float(acc)))
# else: if math.isnan(float(avg_cost)):
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( sys.exit("got NaN loss, training failed.")
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -94,10 +87,11 @@ def train(use_cuda, save_dirname): ...@@ -94,10 +87,11 @@ def train(use_cuda, save_dirname):
feed_order=['img', 'label']) feed_order=['img', 'label'])
def infer(use_cuda, save_dirname=None): def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place) inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
batch_size = 1 batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0, tensor_img = numpy.random.uniform(-1.0, 1.0,
...@@ -112,8 +106,14 @@ def main(use_cuda): ...@@ -112,8 +106,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_mlp.inference.model" save_dirname = "recognize_digits_mlp.inference.model"
# call train() with is_local argument to run distributed train # call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname) train(
infer(use_cuda=use_cuda, save_dirname=save_dirname) use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -90,7 +90,7 @@ def train_program(is_sparse): ...@@ -90,7 +90,7 @@ def train_program(is_sparse):
return avg_cost return avg_cost
def train(use_cuda, is_sparse, save_path): def train(use_cuda, train_program, save_path):
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
test_reader = paddle.batch( test_reader = paddle.batch(
...@@ -105,23 +105,21 @@ def train(use_cuda, is_sparse, save_path): ...@@ -105,23 +105,21 @@ def train(use_cuda, is_sparse, save_path):
print("loss= ", avg_cost) print("loss= ", avg_cost)
if avg_cost < 5.0: if avg_cost < 5.0:
trainer.save_inference_model(save_path) trainer.save_params(save_path)
return return
if math.isnan(avg_cost): if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.") sys.exit("got NaN loss, training failed.")
trainer = fluid.Trainer( trainer = fluid.Trainer(
partial(train_program, is_sparse), train_program, fluid.optimizer.SGD(learning_rate=0.001), place=place)
partial(inference_program, is_sparse),
fluid.optimizer.SGD(learning_rate=0.001),
place=place)
trainer.train( trainer.train(
reader=train_reader, num_epochs=1, event_handler=event_handler) reader=train_reader, num_epochs=1, event_handler=event_handler)
def infer(use_cuda, is_sparse, save_path): def infer(use_cuda, inference_program, save_path):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_path, place=place) inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_path, place=place)
lod = [0, 1] lod = [0, 1]
first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
...@@ -144,9 +142,9 @@ def main(use_cuda, is_sparse): ...@@ -144,9 +142,9 @@ def main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
save_path = "word2vec.inference.model" save_path = "word2vec.params"
train(use_cuda, is_sparse, save_path) train(use_cuda, partial(train_program, is_sparse), save_path)
infer(use_cuda, is_sparse, save_path) infer(use_cuda, partial(inference_program, is_sparse), save_path)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -92,19 +92,13 @@ class Trainer(object): ...@@ -92,19 +92,13 @@ class Trainer(object):
place: The device place of this trainer. place: The device place of this trainer.
""" """
def __init__(self, def __init__(self, train_func, optimizer, param_path=None, place=None):
train_func,
infer_func,
optimizer,
param_path=None,
place=None):
# 1. we need to generate a framework.Program by calling # 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in # program_func. Reference: fluid.program_guard in
# test_word2vec.py # test_word2vec.py
if not isinstance(optimizer, opt_module.Optimizer): if not isinstance(optimizer, opt_module.Optimizer):
raise TypeError("The optimizer should be an instance of Optimizer") raise TypeError("The optimizer should be an instance of Optimizer")
self.infer_func = infer_func
self.scope = core.Scope() self.scope = core.Scope()
self.startup_program = framework.Program() self.startup_program = framework.Program()
...@@ -226,15 +220,6 @@ class Trainer(object): ...@@ -226,15 +220,6 @@ class Trainer(object):
exe = executor.Executor(self.place) exe = executor.Executor(self.place)
io.save_persistables(exe, dirname=param_path) io.save_persistables(exe, dirname=param_path)
def save_inference_model(self, model_path):
inference_program = framework.Program()
with framework.program_guard(inference_program):
with unique_name.guard():
predict_var = self.infer_func()
predict_var = self.train_program.block(0).var(predict_var.name)
exe = executor.Executor(self.place)
io.save_inference_model(model_path, [], [predict_var], exe)
@contextlib.contextmanager @contextlib.contextmanager
def _prog_and_scope_guard(self): def _prog_and_scope_guard(self):
with framework.program_guard( with framework.program_guard(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册