diff --git a/paddle/fluid/pybind/parallel_executor.cc b/paddle/fluid/pybind/parallel_executor.cc index 5c49c1d904eb27598015e8b64e72864a56b145e8..d23b9e568148725742e0ccf6f6d81141f3558f5a 100644 --- a/paddle/fluid/pybind/parallel_executor.cc +++ b/paddle/fluid/pybind/parallel_executor.cc @@ -372,17 +372,13 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT Examples: .. code-block:: python - import os import paddle import paddle.static as static paddle.enable_static() - os.environ['CPU_NUM'] = str(2) - places = static.cpu_places() - data = static.data(name="x", shape=[None, 1], dtype="float32") - hidden = static.nn.fc(input=data, size=10) + hidden = static.nn.fc(data, size=10) loss = paddle.mean(hidden) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) @@ -390,10 +386,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT build_strategy.enable_inplace = True build_strategy.memory_optimize = True build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce - program = static.CompiledProgram(static.default_main_program()) - program = program.with_data_parallel(loss_name=loss.name, - build_strategy=build_strategy, - places=places) + program = static.CompiledProgram(static.default_main_program(), build_strategy=build_strategy) )DOC"); py::enum_(build_strategy, "ReduceStrategy") @@ -461,7 +454,6 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT .. code-block:: python import numpy - import os import paddle import paddle.static as static @@ -471,20 +463,8 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() exe = static.Executor(place) - # NOTE: If you use CPU to run the program, you need - # to specify the CPU_NUM, otherwise, paddle will use - # all the number of the logic core as the CPU_NUM, - # in that case, the batch size of the input should be - # greater than CPU_NUM, if not, the process will be - # failed by an exception. - if not use_cuda: - os.environ['CPU_NUM'] = str(2) - places = static.cpu_places() - else: - places = static.cuda_places() - data = static.data(name='X', shape=[None, 1], dtype='float32') - hidden = static.nn.fc(input=data, size=10) + hidden = static.nn.fc(data, size=10) loss = paddle.mean(hidden) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) @@ -492,19 +472,18 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT build_strategy = static.BuildStrategy() build_strategy.gradient_scale_strategy = \ - static.BuildStrategy.GradientScaleStrategy.Customized + static.BuildStrategy.GradientScaleStrategy.Customized compiled_prog = static.CompiledProgram( - static.default_main_program()).with_data_parallel( - loss_name=loss.name, build_strategy=build_strategy, - places=places) + static.default_main_program(), + build_strategy=build_strategy, + ) - dev_count = len(places) x = numpy.random.random(size=(10, 1)).astype('float32') - loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01 + loss_grad = numpy.ones((1)).astype("float32") * 0.01 loss_grad_name = loss.name+"@GRAD" loss_data = exe.run(compiled_prog, - feed={"X": x, loss_grad_name : loss_grad}, - fetch_list=[loss.name, loss_grad_name]) + feed={"X": x, loss_grad_name : loss_grad}, + fetch_list=[loss.name, loss_grad_name]) )DOC") .def_property( "debug_graphviz_path", diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index 8be433ae828397f2884d6935ae57456ede5dc0e8..977fcbdf15f0d053570ee55b058f45337bd02cfd 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -82,18 +82,6 @@ def _has_optimize_op(block): return False -def _has_optimizer_in_control_flow(program): - if not program: - program = framework.default_main_program() - for op in program.global_block().ops: - if op.type == "conditional_block_grad": - sub_block = program.block(op._block_attr_id("sub_block")) - if _has_optimize_op(sub_block): - return True - - return False - - def _should_broadcast_or_not_exists(program, var_name): block = program.global_block() var = block.vars.get(var_name, None) diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 8e36dd28005be7cf0c21423ae93d8e6a62c2d88e..b9ed17304c8910e53590bfa410d4e08436935ed5 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -798,21 +798,13 @@ class DataLoader: # Define network loss = simple_net(image, label) - # Set data source of DataLoader - # - # If DataLoader is iterable, places must be given and the number of places must be the same with device number. - # - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places. - # - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places. - # - # If DataLoader is not iterable, places can be None. places = static.cuda_places() if USE_GPU else static.cpu_places() set_data_source(loader, places) exe = static.Executor(places[0]) exe.run(static.default_startup_program()) - prog = static.CompiledProgram(static.default_main_program()).with_data_parallel(loss_name=loss.name) - + prog = static.CompiledProgram(static.default_main_program()) if loader.iterable: train_iterable(exe, prog, loss, loader) else: @@ -890,54 +882,6 @@ class DataLoader: print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy()))) - Examples 3: - - .. code-block:: python - - ''' - Example of `drop_last` using in static graph multi-cards mode - ''' - import paddle - import paddle.static as static - import numpy as np - import os - - # We use 2 CPU cores to run inference network - os.environ['CPU_NUM'] = '2' - - paddle.enable_static() - - # The data source has only 3 batches, which can not be - # divided evenly to each CPU core - def batch_generator(): - for i in range(3): - yield np.array([i+1]).astype('float32'), - - x = static.data(name='x', shape=[None], dtype='float32') - y = x * x - - def run_inference(drop_last): - loader = paddle.io.DataLoader.from_generator(feed_list=[x], - capacity=8, drop_last=drop_last) - loader.set_batch_generator(batch_generator, static.cpu_places()) - - exe = static.Executor(paddle.CPUPlace()) - prog = static.CompiledProgram(static.default_main_program()) - prog = prog.with_data_parallel() - - result = [] - for data in loader(): - each_ret, = exe.run(prog, feed=data, fetch_list=[y]) - result.extend(each_ret) - return result - - # Set drop_last to True, so that the last batch whose - # number is less than CPU core number would be discarded. - print(run_inference(drop_last=True)) # [1.0, 4.0] - - # Set drop_last to False, so that the last batch whose - # number is less than CPU core number can be tested. - print(run_inference(drop_last=False)) # [1.0, 4.0, 9.0] """ if _non_static_mode(): return DygraphGeneratorLoader(