diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index f96a2d282740dc943ae0fac2427e8f8efdaf5252..c859778b3757f638ac531620f241e684522add57 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -79,6 +79,61 @@ class DataToLoDTensorConverter(object): class DataFeeder(object): + """ + DataFeeder converts the data that returned by a reader into a data + structure that can feed into Executor and ParallelExecutor. The reader + usually returns a list of mini-batch data entries. Each data entry in + the list is one sample. Each sample is a list or a tuple with one + feature or multiple features. + + The simple usage shows below: + + .. code-block:: python + + place = fluid.CPUPlace() + img = fluid.layers.data(name='image', shape=[1, 28, 28]) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) + result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) + + + If you want to feed data into GPU side separately in advance when you + use multi-GPU to train a model, you can use `decorate_reader` function. + + .. code-block:: python + + place=fluid.CUDAPlace(0) + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + reader = feeder.decorate_reader( + paddle.batch(flowers.train(), batch_size=16)) + + Args: + feed_list(list): The Variables or Variables'name that will + feed into model. + place(Place): place indicates feed data into CPU or GPU, if you want to + feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents + the GPU id), or if you want to feed data into CPU, please using + `fluid.CPUPlace()`. + program(Program): The Program that will feed data into, if program + is None, it will use default_main_program(). Default None. + + Raises: + ValueError: If some Variable is not in this Program. + + Examples: + .. code-block:: python + + # ... + place = fluid.CPUPlace() + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_vars_name + ] # feed_vars_name is a list of variables' name. + feeder = fluid.DataFeeder(feed_list, place) + for data in reader(): + outs = exe.run(program=main_program, + feed=feeder.feed(data)) + """ + def __init__(self, feed_list, place, program=None): self.feed_dtypes = [] self.feed_names = [] @@ -108,6 +163,16 @@ class DataFeeder(object): self.place = place def feed(self, iterable): + """ + According to feed_list and iterable, converters the input into + a data structure that can feed into Executor and ParallelExecutor. + + Args: + iterable(list|tuple): the input data. + + Returns: + dict: the result of conversion. + """ converter = [] for lod_level, shape, dtype in six.zip( self.feed_lod_level, self.feed_shapes, self.feed_dtypes): @@ -130,6 +195,20 @@ class DataFeeder(object): return ret_dict def feed_parallel(self, iterable, num_places=None): + """ + Takes multiple mini-batches. Each mini-batch will be feed on each + device in advance. + + Args: + iterable(list|tuple): the input data. + num_places(int): the number of devices. Default None. + + Returns: + dict: the result of conversion. + + Notes: + The number of devices and number of mini-batches must be same. + """ if isinstance(self.place, core.CUDAPlace): places = [ core.CUDAPlace(i) @@ -168,6 +247,24 @@ class DataFeeder(object): multi_devices, num_places=None, drop_last=True): + """ + Converter the input data into a data that returned by reader into + multiple mini-batches. Each mini-batch will be feed on each device. + + Args: + reader(fun): the input data. + multi_devices(bool): the number of places. Default None. + num_places(int): the number of places. Default None. + drop_last(bool): the number of places. Default None. + + Returns: + dict: the result of conversion. + + Raises: + ValueError: If drop_last is False and the data batch which cannot + fit for devices. + """ + def __reader_creator__(): if not multi_devices: for item in reader(): diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 33d8f709412b25d29c6618272500dd7b953d6645..81e94096144f58df51315c5a7f597925dda735c3 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -135,14 +135,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name): def fetch_var(name, scope=None, return_numpy=True): """ - Fetch the value of the variable with the given name from the given scope + Fetch the value of the variable with the given name from the + given scope. + Args: name(str): name of the variable. Typically, only persistable variables can be found in the scope used for running the program. scope(core.Scope|None): scope object. It should be the scope where you pass to Executor.run() when running your program. - If None, global_scope() will be used. - return_numpy(bool): whether convert the tensor to numpy.ndarray + If None, global_scope() will be used. Default None. + return_numpy(bool): whether convert the tensor to numpy.ndarray. + Default True. + Returns: LodTensor|numpy.ndarray """