diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 69bbef77f85bd25674235fbe0a54e9a70d43e714..21470bbef4dd7e651ea58071bf5b5abf9eacb32a 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1251,6 +1251,33 @@ class AdamaxOptimizer(Optimizer): However, it is added here for numerical stability to prevent the division by 0 error. + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import numpy + + # First create the Executor. + place = fluid.CPUPlace() # fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + train_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + data = fluid.layers.data(name='X', shape=[1], dtype='float32') + hidden = fluid.layers.fc(input=data, size=10) + loss = fluid.layers.mean(hidden) + adam = fluid.optimizer.Adamax(learning_rate=0.2) + adam.minimize(loss) + + # Run the startup program once and only once. + exe.run(startup_program) + + x = numpy.random.random(size=(10, 1)).astype('float32') + outs = exe.run(program=train_program, + feed={'X': x}, + fetch_list=[loss.name]) + Args: learning_rate (float|Variable): the learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. @@ -1261,12 +1288,6 @@ class AdamaxOptimizer(Optimizer): fluid.regularizer.L2DecayRegularizer. name: A optional name prefix. - Examples: - .. code-block:: python - - optimizer = fluid.optimizer.Adamax(learning_rate=0.2) - optimizer.minimize(cost) - Notes: Currently, AdamaxOptimizer doesn't support sparse parameter optimization. """