diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index 2373cff222538f38128c7a506f7775cefebbe60d..f2ae63b5118cacfa2836582fb3d2d262a6434fe4 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -151,11 +151,62 @@ def check_and_get_place(place): class Trainer(object): """ + A trainer wraps MultiGPU/MultiNode training loops and can be used to train a + simple neural network easily. + + This API takes a :code:`train_func`. A :code:`train_func` is a function that + return loss as it first return value. The reset value can be fetched by + EndStepEvent.metrics + + This API also takes a :code:`optimizer_func` that will return an optimizer + instance. + + For example, to train a MLP for MNIST dataset, the sample program is + + >>> import paddle.fluid as fluid + >>> + >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): + >>> hidden = image + >>> for layer_size in layer_sizes: + >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) + >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") + >>> + >>> def train_mnist_mlp(): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> prediction = mlp(img) + >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) + >>> + >>> def optimizer(): + >>> return fluid.optimizer.Adam() + >>> + >>> trainer = Trainer(train_func=train_mnist_mlp, + >>> optimizer_func=optimizer, + >>> place=fluid.CUDAPlace(0), + >>> parallel=True) + >>> + >>> def train_callback(event): + >>> if isinstance(event, fluid.EndStepEvent): + >>> print "Epoch ID", event.epoch, "Step ID",\ + >>> event.step, "AvgLoss", event.metrics[0] + >>> elif isinstance(event, fluid.EndEpochEvent): + >>> trainer.save_params("./model_{0}".format(event.epoch)) + >>> + >>> trainer.train(num_epochs=100, event_handler=train_callback) + + For more example, please see :ref:`api_guide_high_level_api`. + Args: - train_func(callable): A function which will return loss. The loss must be a scalar. + train_func(callable): A function which will return loss. The loss must be + a scalar tensor. optimizer_func(callable): A function that returns an Optimizer object. - place: The device place of this trainer. + place(CUDAPlace|CPUPlace): The device place of this trainer. If + :code:`parallel=True,` all CUDA Places will be used if :code:`place` + is a :code:`CUDAPlace`. + parallel(bool): True if use multiple devices. + checkpoint_config(CheckpointConfig): Configuration about how to save + checkpoints. """ def __init__(self, @@ -167,9 +218,6 @@ class Trainer(object): checkpoint_config=None): self.__stop = False self.parallel = parallel - # 1. we need to generate a framework.Program by calling - # program_func. Reference: fluid.program_guard in - # test_word2vec.py # config for checkpoint # only chief worker will save variables @@ -183,6 +231,10 @@ class Trainer(object): self.scope = core.Scope() + # 1. we need to generate a framework.Program by calling + # program_func. Reference: fluid.program_guard in + # test_word2vec.py + self.startup_program = framework.Program() self.train_program = framework.Program() @@ -315,17 +367,18 @@ class Trainer(object): def train(self, num_epochs, event_handler, reader=None, feed_order=None): """ - Train the model. + Start the train loop to train the model. Args: num_epochs: The number of epoch. An epoch will process all data in reader event_handler: The event handler. A function with type (ev:Event)->void - reader: + reader: A reader creator object. See also + :ref:`api_guide_python_reader` . feed_order: Feeding order of reader. None will following the defining order in program Returns: - + None """ training_role = os.getenv("PADDLE_TRAINING_ROLE", "") if training_role == "PSERVER": @@ -354,7 +407,14 @@ class Trainer(object): self.train_func_outputs) def save_params(self, param_path): - # reference: save_persistables in io.py + """ + Save all parameters into :code:`param_path` + Args: + param_path(str): The path to save parameters + + Returns: + None + """ with self._prog_and_scope_guard(): exe = executor.Executor(self.place) io.save_persistables(exe, dirname=param_path)