diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index fd773fabd08fa79e79c552aececf99215cbf84a1..8be433ae828397f2884d6935ae57456ede5dc0e8 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -182,156 +182,6 @@ class CompiledProgram: self._build_strategy = build_strategy self._exec_strategy = None - def with_data_parallel( - self, - loss_name=None, - build_strategy=None, - exec_strategy=None, - share_vars_from=None, - places=None, - ): - """ - This interface is used to transform the input Program or Graph to a multi-graph - to run the model in data parallel mode. Users can use the build_strategy and - exec_strategy to set some optimizations that can be applied during the construction - and computation of the Graph, such as reducing the number of AllReduce operations, - specifying the size of the thread pool used in the computation Graph running the model, - and so on. - - .. note:: - If build_strategy is specified when building CompiledProgram and calling - with_data_parallel, build_strategy in CompiledProgram will be overwritten, therefore, - if it is data parallel training, it is recommended to set build_strategy when calling - with_data_parallel interface. - - Args: - loss_name (str): This parameter is the name of the loss Tensor of the model. - **Note: If it is model training, you must set loss_name, otherwise the - result may be problematic**. The default is None. - build_strategy(BuildStrategy): This parameter is used to compile the - program or graph with the specified options, such as operators' fusion - in the computational graph and memory optimization during the execution - of the computational graph. For more information about build_strategy, - please refer to :code:`fluid.BuildStrategy`. The default is None. - exec_strategy(ExecutionStrategy): exec_strategy specifies the options that can - be changed when running the current model, such as the thread pool size. - For more information about exec_strategy, please refer to :code:`fluid.ExecutionStrategy`. - The default is None. - share_vars_from(CompiledProgram): If share_vars_from is set, the current - CompiledProgram will share the parameter value with the CompiledProgram - specified by share_vars_from. This parameter needs to be set when model testing - is required during model training, and the data parallel mode is used for - training and testing. Since CompiledProgram will only distribute parameter - Tensors to other devices when it is first executed, the CompiledProgram - specified by share_vars_from must be run before the current CompiledProgram. - The default is None. - places(list(CUDAPlace)|list(CPUPlace)|list(str)|None): This parameter specifies the device - on which the model is running. If you want to run on GPU0 and GPU1, places are - [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]; if you want to run with 2 CPUs, places are - [fluid.CPUPlace()] * 2. If the parameter is not set, i.e. the parameter is None, - the available device will be obtained from the environment variable when the model - is executed: If the GPU is used, the currently available device ID is obtained - from the environment variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when - the model is executed; CPU, when the model is executed, the currently available - CPU number is obtained from the environment variable CPU_NUM. For example, - export CPU_NUM=4, if the environment variable is not set, the executor will - add the variable to the environment variable and set its value to 1. - The default is None. If ``places`` is the list of string, the string in the list - can be ``cpu``, ``gpu:x``, where ``x`` is the index of the GPUs. - - Returns: - CompiledProgram - - Example: - .. code-block:: python - - import numpy - import os - import paddle - import paddle.static as static - - paddle.enable_static() - - use_cuda = True - place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() - parallel_places = [paddle.CUDAPlace(0), paddle.CUDAPlace(1)] if use_cuda else [paddle.CPUPlace()] * 2 - - # NOTE: If you use CPU to run the program, you need - # to specify the CPU_NUM, otherwise, paddle will use - # all the number of the logic core as the CPU_NUM, - # in that case, the batch size of the input should be - # greater than CPU_NUM, if not, the process will be - # failed by an exception. - if not use_cuda: - os.environ['CPU_NUM'] = str(2) - - exe = static.Executor(place) - - data = static.data(name='X', shape=[None, 1], dtype='float32') - hidden = static.nn.fc(x=data, size=10) - loss = paddle.mean(hidden) - - test_program = static.default_main_program().clone(for_test=True) - paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) - - exe.run(static.default_startup_program()) - compiled_train_prog = static.CompiledProgram( - static.default_main_program()).with_data_parallel( - loss_name=loss.name, places=parallel_places) - # NOTE: if not set share_vars_from=compiled_train_prog, - # the parameters used in test process are different with - # the parameters used by train process - compiled_test_prog = static.CompiledProgram( - test_program).with_data_parallel( - share_vars_from=compiled_train_prog, - places=parallel_places) - - train_data = numpy.random.random(size=(10, 1)).astype('float32') - loss_data, = exe.run(compiled_train_prog, - feed={"X": train_data}, - fetch_list=[loss.name]) - test_data = numpy.random.random(size=(10, 1)).astype('float32') - loss_data, = exe.run(compiled_test_prog, - feed={"X": test_data}, - fetch_list=[loss.name]) - """ - assert ( - not self._is_data_parallel - ), "Already compiled with parallel, cannot be recompiled." - assert ( - not self._is_inference - ), "Cannot compile with both data parallel and inference." - self._is_data_parallel = True - # FIXME(zcd): Currently, the build_strategy can be set during creating - # CompiledProgram or calling with_data_parallel, and it may be confusing, - # but in the long run, we should set up build_strategy only when creating - # CompiledProgram, and exec_strategy should be deprecated. - if build_strategy is not None: - self._build_strategy = build_strategy - self._exec_strategy = exec_strategy - self._loss_name = loss_name - self._share_vars_from = share_vars_from - if isinstance(places, (list, tuple)): - self._places = _get_paddle_place_list(places) - else: - self._places = _get_paddle_place(places) - - if _has_backward_op(self._graph): - assert ( - self._loss_name is not None - ), "The loss name of CompiledProgram is None. The loss name should be set if CompiledProgram contains backward part." - - if self._places is not None: - if not isinstance(self._places, (list, tuple)): - self._places = [self._places] - if self._places is not None and len(self._places) > 1: - raise NotImplementedError( - "If you need to train with multi-gpus, please use `fleet` instead of `with_data_parallel`." - "This will be removed soon in develop version." - ) - - return self - def _with_inference_optimize(self, config): """Add inference optimize