未验证 提交 14f1973d 编写于 作者: K kangguangli 提交者: GitHub

[with_data_parallel][part13] remove with_data_parallel in example code (#51588)

* remove with_data_parallel in example code

* revert python/paddle/fluid/data_feeder.py

* fix static.nn.fc api
上级 5b3c7ee7
...@@ -372,17 +372,13 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -372,17 +372,13 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
Examples: Examples:
.. code-block:: python .. code-block:: python
import os
import paddle import paddle
import paddle.static as static import paddle.static as static
paddle.enable_static() paddle.enable_static()
os.environ['CPU_NUM'] = str(2)
places = static.cpu_places()
data = static.data(name="x", shape=[None, 1], dtype="float32") data = static.data(name="x", shape=[None, 1], dtype="float32")
hidden = static.nn.fc(input=data, size=10) hidden = static.nn.fc(data, size=10)
loss = paddle.mean(hidden) loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
...@@ -390,10 +386,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -390,10 +386,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
build_strategy.enable_inplace = True build_strategy.enable_inplace = True
build_strategy.memory_optimize = True build_strategy.memory_optimize = True
build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce
program = static.CompiledProgram(static.default_main_program()) program = static.CompiledProgram(static.default_main_program(), build_strategy=build_strategy)
program = program.with_data_parallel(loss_name=loss.name,
build_strategy=build_strategy,
places=places)
)DOC"); )DOC");
py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy") py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy")
...@@ -461,7 +454,6 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -461,7 +454,6 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
.. code-block:: python .. code-block:: python
import numpy import numpy
import os
import paddle import paddle
import paddle.static as static import paddle.static as static
...@@ -471,20 +463,8 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -471,20 +463,8 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = static.Executor(place) exe = static.Executor(place)
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, paddle will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
places = static.cpu_places()
else:
places = static.cuda_places()
data = static.data(name='X', shape=[None, 1], dtype='float32') data = static.data(name='X', shape=[None, 1], dtype='float32')
hidden = static.nn.fc(input=data, size=10) hidden = static.nn.fc(data, size=10)
loss = paddle.mean(hidden) loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
...@@ -492,19 +472,18 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -492,19 +472,18 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
build_strategy = static.BuildStrategy() build_strategy = static.BuildStrategy()
build_strategy.gradient_scale_strategy = \ build_strategy.gradient_scale_strategy = \
static.BuildStrategy.GradientScaleStrategy.Customized static.BuildStrategy.GradientScaleStrategy.Customized
compiled_prog = static.CompiledProgram( compiled_prog = static.CompiledProgram(
static.default_main_program()).with_data_parallel( static.default_main_program(),
loss_name=loss.name, build_strategy=build_strategy, build_strategy=build_strategy,
places=places) )
dev_count = len(places)
x = numpy.random.random(size=(10, 1)).astype('float32') x = numpy.random.random(size=(10, 1)).astype('float32')
loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01 loss_grad = numpy.ones((1)).astype("float32") * 0.01
loss_grad_name = loss.name+"@GRAD" loss_grad_name = loss.name+"@GRAD"
loss_data = exe.run(compiled_prog, loss_data = exe.run(compiled_prog,
feed={"X": x, loss_grad_name : loss_grad}, feed={"X": x, loss_grad_name : loss_grad},
fetch_list=[loss.name, loss_grad_name]) fetch_list=[loss.name, loss_grad_name])
)DOC") )DOC")
.def_property( .def_property(
"debug_graphviz_path", "debug_graphviz_path",
......
...@@ -82,18 +82,6 @@ def _has_optimize_op(block): ...@@ -82,18 +82,6 @@ def _has_optimize_op(block):
return False return False
def _has_optimizer_in_control_flow(program):
if not program:
program = framework.default_main_program()
for op in program.global_block().ops:
if op.type == "conditional_block_grad":
sub_block = program.block(op._block_attr_id("sub_block"))
if _has_optimize_op(sub_block):
return True
return False
def _should_broadcast_or_not_exists(program, var_name): def _should_broadcast_or_not_exists(program, var_name):
block = program.global_block() block = program.global_block()
var = block.vars.get(var_name, None) var = block.vars.get(var_name, None)
......
...@@ -798,21 +798,13 @@ class DataLoader: ...@@ -798,21 +798,13 @@ class DataLoader:
# Define network # Define network
loss = simple_net(image, label) loss = simple_net(image, label)
# Set data source of DataLoader
#
# If DataLoader is iterable, places must be given and the number of places must be the same with device number.
# - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places.
# - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places.
#
# If DataLoader is not iterable, places can be None.
places = static.cuda_places() if USE_GPU else static.cpu_places() places = static.cuda_places() if USE_GPU else static.cpu_places()
set_data_source(loader, places) set_data_source(loader, places)
exe = static.Executor(places[0]) exe = static.Executor(places[0])
exe.run(static.default_startup_program()) exe.run(static.default_startup_program())
prog = static.CompiledProgram(static.default_main_program()).with_data_parallel(loss_name=loss.name) prog = static.CompiledProgram(static.default_main_program())
if loader.iterable: if loader.iterable:
train_iterable(exe, prog, loss, loader) train_iterable(exe, prog, loss, loader)
else: else:
...@@ -890,54 +882,6 @@ class DataLoader: ...@@ -890,54 +882,6 @@ class DataLoader:
print("Epoch {} batch {}: loss = {}".format( print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy()))) epoch_id, batch_id, np.mean(loss.numpy())))
Examples 3:
.. code-block:: python
'''
Example of `drop_last` using in static graph multi-cards mode
'''
import paddle
import paddle.static as static
import numpy as np
import os
# We use 2 CPU cores to run inference network
os.environ['CPU_NUM'] = '2'
paddle.enable_static()
# The data source has only 3 batches, which can not be
# divided evenly to each CPU core
def batch_generator():
for i in range(3):
yield np.array([i+1]).astype('float32'),
x = static.data(name='x', shape=[None], dtype='float32')
y = x * x
def run_inference(drop_last):
loader = paddle.io.DataLoader.from_generator(feed_list=[x],
capacity=8, drop_last=drop_last)
loader.set_batch_generator(batch_generator, static.cpu_places())
exe = static.Executor(paddle.CPUPlace())
prog = static.CompiledProgram(static.default_main_program())
prog = prog.with_data_parallel()
result = []
for data in loader():
each_ret, = exe.run(prog, feed=data, fetch_list=[y])
result.extend(each_ret)
return result
# Set drop_last to True, so that the last batch whose
# number is less than CPU core number would be discarded.
print(run_inference(drop_last=True)) # [1.0, 4.0]
# Set drop_last to False, so that the last batch whose
# number is less than CPU core number can be tested.
print(run_inference(drop_last=False)) # [1.0, 4.0, 9.0]
""" """
if _non_static_mode(): if _non_static_mode():
return DygraphGeneratorLoader( return DygraphGeneratorLoader(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册