未验证 提交 8f534696 编写于 作者: C chengduo 提交者: GitHub

Polish Executor and Compiler doc (#17262)

* polish doc
test=develop

* updata parallel executor doc
test=develop

* update API.spec
test=develop

* polish code
test=develop
上级 dd86b400
...@@ -15,12 +15,12 @@ paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=N ...@@ -15,12 +15,12 @@ paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=N
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912')) paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae')) paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a584496aa1343f36eebf3c46b323a74'))
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581')) paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'f482e93b38b4018796969a2e1dde479d')) paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', '4cfcd9c15b766a51b584cc46d38f1ad8'))
paddle.fluid.Executor.train_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', 'd521011d79e71080fe9b5bb179b43518')) paddle.fluid.Executor.train_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', 'd521011d79e71080fe9b5bb179b43518'))
paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0')) paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f65788d9ead293ada47551339df12203'))
paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2')) paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'e480208ccc0c9abf084867206dab4d2c'))
paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680')) paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
...@@ -31,7 +31,7 @@ paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'pr ...@@ -31,7 +31,7 @@ paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'pr
paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '2cb4bd74481861345c70228a0f57620c')) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '33ce6ec50f8eeb05d340e6b114b026fd'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb'))
paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -40,7 +40,7 @@ paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], v ...@@ -40,7 +40,7 @@ paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], v
paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766')) paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690')) paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', 'a8c7793803cf976680d9478e378fa356')) paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', '0e17773521634ef798fddd7d2ea3ef96'))
paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8')) paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
......
...@@ -58,20 +58,34 @@ class CompiledProgram(object): ...@@ -58,20 +58,34 @@ class CompiledProgram(object):
optimizations, for example. optimizations, for example.
* Pre-compute some logic once so that each run is faster. * Pre-compute some logic once so that each run is faster.
* Transform the program so that it can run in multiple devices. * Transform the program so that it can run in multiple devices.
* TODO: transform the program for optimized inference or distributed * Transform the program for optimized inference or distributed
training. training. **Note that: this part is not finished.**
Example: Example:
.. code-block:: python .. code-block:: python
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) import paddle.fluid as fluid
exe.run(startup) import paddle.fluid.compiler as compiler
compiled_prog = compiler.CompiledProgram(main).with_data_parallel( import numpy
loss_name=loss.name) import os
for i in range(5):
test_loss, = exe.run(compiled_prog, place = fluid.CUDAPlace(0) # fluid.CPUPlace()
feed=feed_dict, exe = fluid.Executor(place)
fetch_list=[loss.name])
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
fluid.default_startup_program().random_seed=1
exe.run(fluid.default_startup_program())
compiled_prog = compiler.CompiledProgram(
fluid.default_main_program())
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = exe.run(compiled_prog,
feed={"X": x},
fetch_list=[loss.name])
Args: Args:
program_or_graph (Graph|Program): If it's Program, it will be first program_or_graph (Graph|Program): If it's Program, it will be first
...@@ -108,6 +122,44 @@ class CompiledProgram(object): ...@@ -108,6 +122,44 @@ class CompiledProgram(object):
places=None): places=None):
"""Configs the program to run in data parallel way. """Configs the program to run in data parallel way.
Example:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.compiler as compiler
import numpy
import os
use_cuda = True
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, fluid will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
exe = fluid.Executor(place)
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
fluid.default_startup_program().random_seed=1
exe.run(fluid.default_startup_program())
compiled_prog = compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel(
loss_name=loss.name)
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = exe.run(compiled_prog,
feed={"X": x},
fetch_list=[loss.name])
Args: Args:
loss_name (str): The loss name must set in training. Default None. loss_name (str): The loss name must set in training. Default None.
build_strategy(BuildStrategy): build_strategy is used to build_strategy(BuildStrategy): build_strategy is used to
......
...@@ -38,6 +38,15 @@ def global_scope(): ...@@ -38,6 +38,15 @@ def global_scope():
Get the global/default scope instance. There are a lot of APIs use Get the global/default scope instance. There are a lot of APIs use
:code:`global_scope` as its default value, e.g., :code:`Executor.run` :code:`global_scope` as its default value, e.g., :code:`Executor.run`
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
fluid.global_scope().var("data").get_tensor().set(numpy.ones((2, 2)), fluid.CPUPlace())
numpy.array(fluid.global_scope().find_var("data").get_tensor())
Returns: Returns:
Scope: The global/default scope instance. Scope: The global/default scope instance.
""" """
...@@ -58,10 +67,15 @@ def scope_guard(scope): ...@@ -58,10 +67,15 @@ def scope_guard(scope):
variable in runtime will assigned to the new scope. variable in runtime will assigned to the new scope.
Examples: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> new_scope = fluid.Scope()
>>> with fluid.scope_guard(new_scope): import paddle.fluid as fluid
>>> ... import numpy
new_scope = fluid.Scope()
with fluid.scope_guard(new_scope):
fluid.global_scope().var("data").get_tensor().set(numpy.ones((2, 2)), fluid.CPUPlace())
numpy.array(new_scope.find_var("data").get_tensor())
Args: Args:
scope: The new global/default scope. scope: The new global/default scope.
...@@ -75,11 +89,18 @@ def as_numpy(tensor): ...@@ -75,11 +89,18 @@ def as_numpy(tensor):
""" """
Convert a Tensor to a numpy.ndarray, its only support Tensor without LoD information. Convert a Tensor to a numpy.ndarray, its only support Tensor without LoD information.
For higher dimensional sequence data, please use LoDTensor directly. For higher dimensional sequence data, please use LoDTensor directly.
Examples: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> outs = executor.run(...)
>>> np_outs = map(lambda x: as_numpy(x), outs) import paddle.fluid as fluid
>>> ... import numpy
new_scope = fluid.Scope()
with fluid.scope_guard(new_scope):
fluid.global_scope().var("data").get_tensor().set(numpy.ones((2, 2)), fluid.CPUPlace())
tensor = new_scope.find_var("data").get_tensor()
fluid.executor.as_numpy(tensor) # or numpy.array(new_scope.find_var("data").get_tensor())
Args: Args:
tensor(Variable): a instance of Tensor tensor(Variable): a instance of Tensor
...@@ -263,42 +284,70 @@ def _as_lodtensor(data, place): ...@@ -263,42 +284,70 @@ def _as_lodtensor(data, place):
class Executor(object): class Executor(object):
""" """
An Executor in Python, supports single/multiple-GPU running, and single/multiple-CPU running. An Executor in Python, supports single/multiple-GPU running,
Python executor takes a program, adds feed operators and fetch operators to this program according and single/multiple-CPU running. Python executor takes a program,
to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides adds feed operators and fetch operators to this program according
the variables(or names) that user wants to get after program runs. Note: the executor will run all to feed map and fetch_list. Feed map provides input data for the
operators in the program but not only the operators dependent by the fetch_list. program. fetch_list provides the variables(or names) that user wants
It stores the global variables into the global scope, and creates a local scope for the temporary to get after program runs. Note: the executor will run all operators
variables. The contents in local scope may be discarded after every minibatch forward/backward in the program but not only the operators dependent by the fetch_list.
finished. But the global scope variables will be persistent through different runs. It stores the global variables into the global scope, and creates a
local scope for the temporary variables. The contents in local scope
may be discarded after every minibatch forward/backward finished.
Example: But the global scope variables will be persistent through different runs.
Examples:
.. code-block:: python .. code-block:: python
# First create the Executor. import paddle.fluid as fluid
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() import paddle.fluid.compiler as compiler
exe = fluid.Executor(place) import numpy
import os
# Run the startup program once and only once.
# Not need to optimize/compile the startup program. use_cuda = True
exe.run(fluid.default_startup_program()) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
# Run the main program directly without compile.
loss, = exe.run(fluid.default_main_program(), train_program = fluid.Program()
feed=feed_dict, startup_program = fluid.Program()
fetch_list=[loss.name]) with fluid.program_guard(train_program, startup_program):
# Or, compiled the program and run. See `CompiledProgram` for more detail. data = fluid.layers.data(name='X', shape=[1], dtype='float32')
compiled_prog = compiler.CompiledProgram( hidden = fluid.layers.fc(input=data, size=10)
fluid.default_main_program()).with_data_parallel( loss = fluid.layers.mean(hidden)
loss_name=loss.name) fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
loss, = exe.run(compiled_prog,
feed=feed_dict, # Run the startup program once and only once.
fetch_list=[loss.name]) # Not need to optimize/compile the startup program.
startup_program.random_seed=1
exe.run(startup_program)
# Run the main program directly without compile.
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = exe.run(train_program,
feed={"X": x},
fetch_list=[loss.name])
# Or, compiled the program and run. See `CompiledProgram`
# for more detail.
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, fluid will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
compiled_prog = compiler.CompiledProgram(
train_program).with_data_parallel(
loss_name=loss.name)
loss_data, = exe.run(compiled_prog,
feed={"X": x},
fetch_list=[loss.name])
Args: Args:
place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device place(fluid.CPUPlace|fluid.CUDAPlace(n)): indicate the executor run on which device.
""" """
def __init__(self, place): def __init__(self, place):
...@@ -392,14 +441,18 @@ class Executor(object): ...@@ -392,14 +441,18 @@ class Executor(object):
Close this executor. Close this executor.
You can no longer use this executor after calling this method. You can no longer use this executor after calling this method.
For the distributed training, this method would free the resource on PServers related to For the distributed training, this method would free the resource
the current Trainer. on PServers related to the current Trainer.
Example: Examples:
>>> cpu = core.CPUPlace() .. code-block:: python
>>> exe = Executor(cpu)
>>> ... import paddle.fluid as fluid
>>> exe.close()
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
# execute training or testing
exe.close()
""" """
if not self._closed: if not self._closed:
self._default_executor.close() self._default_executor.close()
...@@ -490,13 +543,37 @@ class Executor(object): ...@@ -490,13 +543,37 @@ class Executor(object):
return_numpy=True, return_numpy=True,
use_program_cache=False): use_program_cache=False):
""" """
Run program by this Executor. Feed data by feed map, fetch result by fetch_list. Run program by this Executor. Feed data by feed map, fetch result by
Python executor takes a program, add feed operators and fetch operators to this program according fetch_list. Python executor takes a program, add feed operators and
to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides fetch operators to this program according to feed map and fetch_list.
Feed map provides input data for the program. fetch_list provides
the variables(or names) that user want to get after program run. the variables(or names) that user want to get after program run.
Note: the executor will run all Note: the executor will run all operators in the program but not
operators in the program but not only the operators dependent by the fetch_list only the operators dependent by the fetch_list.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
# First create the Executor.
place = fluid.CPUPlace() # fluid.CUDAPlace(0)
exe = fluid.Executor(place)
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
adam = fluid.optimizer.Adam()
adam.minimize(loss)
# Run the startup program once and only once.
exe.run(fluid.default_startup_program())
x = numpy.random.random(size=(10, 1)).astype('float32')
outs = exe.run(feed={'X': x},
fetch_list=[loss.name])
Args: Args:
program(Program|CompiledProgram): the program that need to run, program(Program|CompiledProgram): the program that need to run,
...@@ -520,26 +597,6 @@ class Executor(object): ...@@ -520,26 +597,6 @@ class Executor(object):
Returns: Returns:
list(numpy.array): fetch result according to fetch_list. list(numpy.array): fetch result according to fetch_list.
Examples:
>>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
>>> out = fluid.layers.create_tensor(dtype='float32')
>>> hidden = fluid.layers.fc(input=data, size=10)
>>> fluid.layers.assign(hidden,out)
>>> loss = fluid.layers.mean(out)
>>> adam = fluid.optimizer.Adam()
>>> adam.minimize(loss)
>>> cpu = core.CPUPlace()
>>> exe = fluid.Executor(cpu)
>>> exe.run(fluid.default_startup_program())
>>> x = numpy.random.random(size=(10, 1)).astype('float32')
>>> outs = exe.run(
>>> feed={'X': x},
>>> fetch_list=[loss.name])
""" """
if self._closed: if self._closed:
......
...@@ -37,6 +37,53 @@ class ParallelExecutor(object): ...@@ -37,6 +37,53 @@ class ParallelExecutor(object):
is not found, ParallelExecutor will call `multiprocessing.cpu_count` to get the number is not found, ParallelExecutor will call `multiprocessing.cpu_count` to get the number
of CPUs in the system. of CPUs in the system.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
import os
use_cuda = True
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, fluid will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
exe = fluid.Executor(place)
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
test_program = fluid.default_main_program().clone(for_test=True)
fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
startup_program.random_seed=1
exe.run(startup_program)
train_exe = fluid.ParallelExecutor(use_cuda=use_cuda,
main_program=train_program,
loss_name=loss.name)
test_exe = fluid.ParallelExecutor(use_cuda=use_cuda,
main_program=test_program,
share_vars_from=train_exe)
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = train_exe.run(feed={"X": x},
fetch_list=[loss.name])
loss_data, = test_exe.run(feed={"X": x},
fetch_list=[loss.name])
Args: Args:
use_cuda (bool): Whether to use CUDA or not. use_cuda (bool): Whether to use CUDA or not.
loss_name (str): The loss name must set in training. Default None. loss_name (str): The loss name must set in training. Default None.
...@@ -66,16 +113,6 @@ class ParallelExecutor(object): ...@@ -66,16 +113,6 @@ class ParallelExecutor(object):
Raises: Raises:
TypeError: If share_vars_from is provided, but not ParallelExecutor object. TypeError: If share_vars_from is provided, but not ParallelExecutor object.
Examples:
.. code-block:: python
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
test_exe = fluid.ParallelExecutor(use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
""" """
def __init__(self, def __init__(self,
...@@ -152,24 +189,58 @@ class ParallelExecutor(object): ...@@ -152,24 +189,58 @@ class ParallelExecutor(object):
assume the data has been splitted into multiple devices, the each assume the data has been splitted into multiple devices, the each
element in the list will be copied to each device directly. element in the list will be copied to each device directly.
For example, if the feed is a dict: Examples:
.. code-block:: python
>>> exe = ParallelExecutor()
>>> # the image will be splitted into devices. If there is two devices
>>> # each device will process an image with shape (24, 1, 28, 28)
>>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))})
For example, if the feed is a list:
>>> exe = ParallelExecutor() import paddle.fluid as fluid
>>> # each device will process each element in the list. import numpy
>>> # the 1st device will process an image with shape (48, 1, 28, 28) import os
>>> # the 2nd device will process an image with shape (32, 1, 28, 28)
>>> # use_cuda = True
>>> # you can use exe.device_count to get the device number. place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
>>> exe.run(feed=[{"image": numpy.random.random(size=(48, 1, 28, 28))},
>>> {"image": numpy.random.random(size=(32, 1, 28, 28))}, # NOTE: If you use CPU to run the program, you need
>>> ]) # to specify the CPU_NUM, otherwise, fluid will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
exe = fluid.Executor(place)
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
startup_program.random_seed=1
exe.run(startup_program)
train_exe = fluid.ParallelExecutor(use_cuda=use_cuda,
main_program=train_program,
loss_name=loss.name)
# If the feed is a dict:
# the image will be splitted into devices. If there is two devices
# each device will process an image with shape (5, 1)
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = train_exe.run(feed={"X": x},
fetch_list=[loss.name])
# If the feed is a list:
# each device will process each element in the list.
# the 1st device will process an image with shape (10, 1)
# the 2nd device will process an image with shape (9, 1)
#
# you can use exe.device_count to get the device number.
x2 = numpy.random.random(size=(9, 1)).astype('float32')
loss_data, = train_exe.run(feed=[{"X": x}, {"X": x2}],
fetch_list=[loss.name])
Args: Args:
fetch_list(list): The fetched variable names fetch_list(list): The fetched variable names
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册