diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 779a6842ebb03e2afcdb7718f77eb9b0d2c09a83..8244b91d32dd85e905c9df9f6015b29b633d1260 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -95,7 +95,6 @@ if (WITH_TESTING) add_subdirectory(paddle/fluid/tests) add_subdirectory(paddle/fluid/contrib/tests) add_subdirectory(paddle/fluid/contrib/slim/tests) - add_subdirectory(paddle/incubate/hapi/tests) endif() install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 46b84697e5a61e164cbc826d5018db7a6d87f69f..f98c8f5ee6643f09fcfde21b24b5b9bea510129b 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -256,8 +256,6 @@ from .device import get_device # from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS -from . import incubate -from .incubate import hapi from .fluid.dygraph.base import enable_dygraph as disable_static #DEFINE_ALIAS from .fluid.dygraph.base import disable_dygraph as enable_static #DEFINE_ALIAS from .fluid.framework import in_dygraph_mode as in_dynamic_mode #DEFINE_ALIAS @@ -265,3 +263,9 @@ from .fluid.dygraph.base import no_grad #DEFINE_ALIAS from . import jit from . import static + +# high-level api +from .hapi import Model +from .hapi import callbacks +import paddle.text +import paddle.vision diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py index 9060b8c0ddb433b9a21f4fc67161b46f139e4e13..5eba18776c9643077d79e2b6b3c9a239bebec637 100644 --- a/python/paddle/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -196,3 +196,14 @@ def cluster_files_reader(files_pattern, yield line return reader + + +def _check_exists_and_download(path, url, md5, module_name, download=True): + if path and os.path.exists(path): + return path + + if download: + return paddle.dataset.common.download(url, module_name, md5) + else: + raise ValueError('{} not exists and auto download disabled'.format( + path)) diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 99fab7ffceb9278505ab7dc1bfee9bdcb4e188ba..251e305104edc72fe79da34286e98bc8cc81c3c7 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -36,7 +36,7 @@ import tarfile import gzip from collections import defaultdict -import paddle.dataset.common +import paddle import paddle.compat as cpt __all__ = [ diff --git a/python/paddle/device.py b/python/paddle/device.py index e2ef8e7092ad3f6af91c8d5d3c0b1deaed025514..de24fd875130e84d6532d033761f68a5c77a68c2 100644 --- a/python/paddle/device.py +++ b/python/paddle/device.py @@ -13,9 +13,11 @@ # limitations under the License. # TODO: define the functions to manipulate devices +import re + from paddle.fluid import core from paddle.fluid import framework -import re +from paddle.fluid.dygraph.parallel import ParallelEnv __all__ = [ 'get_cudnn_version', @@ -81,8 +83,8 @@ def set_device(device): .. code-block:: python import paddle - paddle.enable_imperative() - paddle.fluid.dygraph.set_device("gpu:0") + paddle.disable_static() + paddle.set_device("cpu") x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32') x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32') data = paddle.stack([x1,x2], axis=1) @@ -90,18 +92,28 @@ def set_device(device): lower_device = device.lower() if lower_device == 'cpu': place = core.CPUPlace() - framework._set_expected_place(place) + elif lower_device == 'gpu': + if not core.is_compiled_with_cuda(): + raise ValueError( + "The device should not be 'gpu', " \ + "since PaddlePaddle is not compiled with CUDA") + place = core.CUDAPlace(ParallelEnv().dev_id) else: - avaliable_device = ((lower_device == 'cpu') or - re.match(r'gpu:\d+', lower_device)) + avaliable_device = re.match(r'gpu:\d+', lower_device) if not avaliable_device: raise ValueError( - "The device must be a string which is like 'cpu' or 'gpu:0'") + "The device must be a string which is like 'cpu', 'gpu' or 'gpu:0'" + ) + if not core.is_compiled_with_cuda(): + raise ValueError( + "The device should not be {}, since PaddlePaddle is " \ + "not compiled with CUDA".format(avaliable_device)) device_info_list = device.split(':', 1) device_id = device_info_list[1] device_id = int(device_id) place = core.CUDAPlace(device_id) - framework._set_expected_place(place) + framework._set_expected_place(place) + return place def get_device(): @@ -116,8 +128,8 @@ def get_device(): .. code-block:: python import paddle - paddle.enable_imperative() - device = paddle.fluid.dygraph.get_device() + paddle.disable_static() + device = paddle.get_device() """ device = '' diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index e3755cbafea41e61352f67c3de040e700297b61a..5662284483bf529034e42178c8a431f6286e31b8 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -69,7 +69,7 @@ class ImperativeQuantAware(object): from paddle.fluid.contrib.slim.quantization \ import ImperativeQuantAware - from paddle.incubate.hapi.vision.models \ + from paddle.vision.models \ import resnet model = resnet.resnet50(pretrained=True) diff --git a/python/paddle/fluid/dataloader/batch_sampler.py b/python/paddle/fluid/dataloader/batch_sampler.py index 1d180329b72510de5e7e9362e4c002f4508ba1be..085dcf6592de5193d62e43e4e74e4527818071de 100644 --- a/python/paddle/fluid/dataloader/batch_sampler.py +++ b/python/paddle/fluid/dataloader/batch_sampler.py @@ -16,10 +16,12 @@ from __future__ import print_function from __future__ import division import numpy as np +import math + from .sampler import Sampler, SequenceSampler, RandomSampler from .dataset import Dataset, IterableDataset -__all__ = ["BatchSampler"] +__all__ = ["BatchSampler", "DistributedBatchSampler"] class BatchSampler(Sampler): @@ -158,3 +160,185 @@ class _InfiniteIterableSampler(object): def __iter__(self): while True: yield [None] * self.batch_size + + +class DistributedBatchSampler(BatchSampler): + """Sampler that restricts data loading to a subset of the dataset. + + In such case, each process can pass a DistributedBatchSampler instance + as a DataLoader sampler, and load a subset of the original dataset that + is exclusive to it. + + .. note:: + Dataset is assumed to be of constant size. + + Args: + dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement + or other python object which implemented + `__len__` for BatchSampler to get sample + number of data source. + batch_size(int): sample indice number in a mini-batch indices. + num_replicas(int, optional): porcess number in distributed training. + If :attr:`num_replicas` is None, :attr:`num_replicas` will be + retrieved from :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. + Default None. + rank(int, optional): the rank of the current process among :attr:`num_replicas` + processes. If :attr:`rank` is None, :attr:`rank` is retrieved from + :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. Default None. + shuffle(bool): whther to shuffle indices order before genrating + batch indices. Default False. + drop_last(bool): whether drop the last incomplete batch dataset size + is not divisible by the batch size. Default False + + Examples: + .. code-block:: python + + import numpy as np + + from paddle.io import Dataset, DistributedBatchSampler + + # init with dataset + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([784]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + dataset = RandomDataset(100) + sampler = DistributedBatchSampler(dataset, batch_size=64) + + for data in sampler: + # do something + break + """ + + def __init__(self, + dataset, + batch_size, + num_replicas=None, + rank=None, + shuffle=False, + drop_last=False): + self.dataset = dataset + + assert isinstance(batch_size, int) and batch_size > 0, \ + "batch_size should be a positive integer" + self.batch_size = batch_size + assert isinstance(shuffle, bool), \ + "shuffle should be a boolean value" + self.shuffle = shuffle + assert isinstance(drop_last, bool), \ + "drop_last should be a boolean number" + + from paddle.fluid.dygraph.parallel import ParallelEnv + + if num_replicas is not None: + assert isinstance(num_replicas, int) and num_replicas > 0, \ + "num_replicas should be a positive integer" + self.nranks = num_replicas + else: + self.nranks = ParallelEnv().nranks + + if rank is not None: + assert isinstance(rank, int) and rank >= 0, \ + "rank should be a non-negative integer" + self.local_rank = rank + else: + self.local_rank = ParallelEnv().local_rank + + self.drop_last = drop_last + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks)) + self.total_size = self.num_samples * self.nranks + + def __iter__(self): + num_samples = len(self.dataset) + indices = np.arange(num_samples).tolist() + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + if self.shuffle: + np.random.RandomState(self.epoch).shuffle(indices) + self.epoch += 1 + + # subsample + def _get_indices_by_batch_size(indices): + subsampled_indices = [] + last_batch_size = self.total_size % (self.batch_size * self.nranks) + assert last_batch_size % self.nranks == 0 + last_local_batch_size = last_batch_size // self.nranks + + for i in range(self.local_rank * self.batch_size, + len(indices) - last_batch_size, + self.batch_size * self.nranks): + subsampled_indices.extend(indices[i:i + self.batch_size]) + + indices = indices[len(indices) - last_batch_size:] + subsampled_indices.extend(indices[ + self.local_rank * last_local_batch_size:( + self.local_rank + 1) * last_local_batch_size]) + return subsampled_indices + + if self.nranks > 1: + indices = _get_indices_by_batch_size(indices) + + assert len(indices) == self.num_samples + _sample_iter = iter(indices) + + batch_indices = [] + for idx in _sample_iter: + batch_indices.append(idx) + if len(batch_indices) == self.batch_size: + yield batch_indices + batch_indices = [] + if not self.drop_last and len(batch_indices) > 0: + yield batch_indices + + def __len__(self): + num_samples = self.num_samples + num_samples += int(not self.drop_last) * (self.batch_size - 1) + return num_samples // self.batch_size + + def set_epoch(self, epoch): + """ + Sets the epoch number. When :attr:`shuffle=True`, this number is used + as seeds of random numbers. By default, users may not set this, all + replicas (workers) use a different random ordering for each epoch. + If set same number at each epoch, this sampler will yield the same + ordering at all epoches. + + Arguments: + epoch (int): Epoch number. + + Examples: + .. code-block:: python + + import numpy as np + + from paddle.io import Dataset, DistributedBatchSampler + + # init with dataset + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([784]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + dataset = RandomDataset(100) + sampler = DistributedBatchSampler(dataset, batch_size=64) + + for epoch in range(10): + sampler.set_epoch(epoch) + """ + self.epoch = epoch diff --git a/python/paddle/fluid/tests/unittests/test_batch_sampler.py b/python/paddle/fluid/tests/unittests/test_batch_sampler.py index 6ec6fdb59f200ce1dc9b6418b7f11329f85ba5dd..4faef77dad40dd3a9c0a8e5cf1b0d4438c1b7a8a 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_batch_sampler.py @@ -18,6 +18,7 @@ import unittest import paddle.fluid as fluid from paddle.io import BatchSampler, Dataset, Sampler, SequenceSampler, RandomSampler +from paddle.io import DistributedBatchSampler class RandomDataset(Dataset): @@ -194,5 +195,15 @@ class TestBatchSamplerWithSamplerShuffle(unittest.TestCase): pass +class TestDistributedBatchSamplerWithSampler(TestBatchSampler): + def init_batch_sampler(self): + dataset = RandomDataset(1000, 10) + bs = DistributedBatchSampler( + dataset=dataset, + batch_size=self.batch_size, + drop_last=self.drop_last) + return bs + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/incubate/hapi/__init__.py b/python/paddle/hapi/__init__.py similarity index 69% rename from python/paddle/incubate/hapi/__init__.py rename to python/paddle/hapi/__init__.py index c0361fa33246ff3315a107c520972ca6bebc8168..87f5a82525cdfa36e48d40c6d12488d359fe99db 100644 --- a/python/paddle/incubate/hapi/__init__.py +++ b/python/paddle/hapi/__init__.py @@ -13,34 +13,15 @@ # limitations under the License. from . import logger -from . import progressbar from . import callbacks -from . import download from . import model from .model import * -from . import datasets -from . import distributed -from . import vision -from . import text -from . import utils - -from . import device -from .device import * - from .dygraph_layer_patch import monkey_patch_layer logger.setup_logger() -__all__ = [ - 'callbacks', - 'datasets', - 'distributed', - 'download', - 'vision', - 'text', - 'utils', -] + model.__all__ + device.__all__ +__all__ = ['callbacks'] + model.__all__ monkey_patch_layer() diff --git a/python/paddle/incubate/hapi/callbacks.py b/python/paddle/hapi/callbacks.py similarity index 88% rename from python/paddle/incubate/hapi/callbacks.py rename to python/paddle/hapi/callbacks.py index 0804708210a9749813e195a8b5579b339986acd6..7ed571fa9c6a4a962b20397c999368dad0734ff0 100644 --- a/python/paddle/incubate/hapi/callbacks.py +++ b/python/paddle/hapi/callbacks.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + from paddle.fluid.dygraph.parallel import ParallelEnv from .progressbar import ProgressBar @@ -117,10 +119,10 @@ class Callback(object): .. code-block:: python - from paddle.incubate.hapi.callbacks import Callback + import paddle # build a simple model checkpoint callback - class ModelCheckpoint(Callback): + class ModelCheckpoint(paddle.callbacks.Callback): def __init__(self, save_freq=1, save_dir=None): self.save_freq = save_freq self.save_dir = save_dir @@ -147,12 +149,12 @@ class Callback(object): - 'verbose': an integer. Verbose mode is 0, 1 or 2. 0 = silent, 1 = progress bar, 2 = one line per epoch. - 'metrics': a list of str. Names of metrics, including 'loss' - and the names of hapi.Metric. + and the names of paddle.metric.Metric. """ self.params = params def set_model(self, model): - """model is instance of hapi.Model. + """model is instance of paddle.Model. """ self.model = model @@ -168,7 +170,7 @@ class Callback(object): Args: logs (dict): The logs is a dict or None. The keys of logs - passed by hapi.Model contains 'loss', metric names and + passed by paddle.Model contains 'loss', metric names and `batch_size`. """ @@ -177,10 +179,10 @@ class Callback(object): Args: logs (dict): The logs is a dict or None. The keys of logs - passed by hapi.Model contains 'steps' and 'metrics', + passed by paddle.Model contains 'steps' and 'metrics', The `steps` is number of total steps of validation dataset. The `metrics` is a list of str including 'loss' and the names - of hapi.Metric. + of paddle.metric.Metric. """ def on_eval_end(self, logs=None): @@ -188,7 +190,7 @@ class Callback(object): Args: logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is a dict contains 'loss', metrics and 'batch_size' + paddle.Model is a dict contains 'loss', metrics and 'batch_size' of last batch of validation dataset. """ @@ -212,7 +214,7 @@ class Callback(object): Args: epoch (int): The index of epoch. logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is None. + paddle.Model is None. """ def on_epoch_end(self, epoch, logs=None): @@ -221,7 +223,7 @@ class Callback(object): Args: epoch (int): The index of epoch. logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + paddle.Model is a dict, contains 'loss', metrics and 'batch_size' of last batch. """ @@ -231,7 +233,7 @@ class Callback(object): Args: step (int): The index of step (or iteration). logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is empty. + paddle.Model is empty. """ def on_train_batch_end(self, step, logs=None): @@ -240,7 +242,7 @@ class Callback(object): Args: step (int): The index of step (or iteration). logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + paddle.Model is a dict, contains 'loss', metrics and 'batch_size' of current batch. """ @@ -250,7 +252,7 @@ class Callback(object): Args: step (int): The index of step (or iteration). logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is empty. + paddle.Model is empty. """ def on_eval_batch_end(self, step, logs=None): @@ -259,7 +261,7 @@ class Callback(object): Args: step (int): The index of step (or iteration). logs (dict): The logs is a dict or None. The `logs` passed by - hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + paddle.Model is a dict, contains 'loss', metrics and 'batch_size' of current batch. """ @@ -292,23 +294,22 @@ class ProgBarLogger(Callback): .. code-block:: python import paddle - import paddle.fluid as fluid - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec - inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')] - labels = [hapi.Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')] + labels = [InputSpec([None, 1], 'int64', 'label')] - train_dataset = hapi.datasets.MNIST(mode='train') + train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), + model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), inputs, labels) - optim = fluid.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) - callback = hapi.callbacks.ProgBarLogger(log_freq=10) + callback = paddle.callbacks.ProgBarLogger(log_freq=10) model.fit(train_dataset, batch_size=64, callbacks=callback) """ @@ -428,23 +429,22 @@ class ModelCheckpoint(Callback): .. code-block:: python import paddle - import paddle.fluid as fluid - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec - inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')] - labels = [hapi.Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')] + labels = [InputSpec([None, 1], 'int64', 'label')] - train_dataset = hapi.datasets.MNIST(mode='train') + train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), + model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), inputs, labels) - optim = fluid.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) - callback = hapi.callbacks.ModelCheckpoint(save_dir='./temp') + callback = paddle.callbacks.ModelCheckpoint(save_dir='./temp') model.fit(train_dataset, batch_size=64, callbacks=callback) """ @@ -461,11 +461,11 @@ class ModelCheckpoint(Callback): def on_epoch_end(self, epoch, logs=None): if self._is_save() and self.epoch % self.save_freq == 0: path = '{}/{}'.format(self.save_dir, epoch) - print('save checkpoint at {}'.format(path)) + print('save checkpoint at {}'.format(os.path.abspath(path))) self.model.save(path) def on_train_end(self, logs=None): if self._is_save(): path = '{}/final'.format(self.save_dir) - print('save checkpoint at {}'.format(path)) + print('save checkpoint at {}'.format(os.path.abspath(path))) self.model.save(path) diff --git a/python/paddle/incubate/hapi/dygraph_layer_patch.py b/python/paddle/hapi/dygraph_layer_patch.py similarity index 98% rename from python/paddle/incubate/hapi/dygraph_layer_patch.py rename to python/paddle/hapi/dygraph_layer_patch.py index cb3cc10a84dd9347bf4b781031bedb5836dfbd4c..e3a2948b69305fcb08c14c850f5738ac46aea2be 100644 --- a/python/paddle/incubate/hapi/dygraph_layer_patch.py +++ b/python/paddle/hapi/dygraph_layer_patch.py @@ -16,8 +16,7 @@ import warnings import paddle.fluid as fluid from paddle.fluid.framework import in_dygraph_mode - -from .device import _get_device +from paddle.fluid.framework import _current_expected_place as _get_device def monkey_patch_layer(): diff --git a/python/paddle/incubate/hapi/logger.py b/python/paddle/hapi/logger.py similarity index 100% rename from python/paddle/incubate/hapi/logger.py rename to python/paddle/hapi/logger.py diff --git a/python/paddle/incubate/hapi/model.py b/python/paddle/hapi/model.py similarity index 86% rename from python/paddle/incubate/hapi/model.py rename to python/paddle/hapi/model.py index b52354d4ccf4671b0d372bae63a1befbe383e053..5aa689ca324c099f239a29e2ee21b8283e378341 100644 --- a/python/paddle/incubate/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -22,37 +22,182 @@ import pickle import numpy as np import six import warnings +import time +import socket +import contextlib from collections import Iterable import paddle from paddle import fluid from paddle.fluid import core from paddle.fluid.framework import in_dygraph_mode, Variable, ParamBase, _current_expected_place -# Note: Use alias `Input` temporarily before releasing hapi feature. -from paddle.static import InputSpec as Input +from paddle.fluid.framework import in_dygraph_mode, Variable +from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.executor import global_scope from paddle.fluid.io import is_belong_to_optimizer from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, FunctionSpec from paddle.fluid.layers.utils import flatten +from paddle.fluid.layers import collective from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy from paddle.fluid.incubate.fleet.base import role_maker -from paddle.fluid.executor import scope_guard, Executor -from paddle.io import DataLoader, Dataset +from paddle.io import DataLoader, Dataset, DistributedBatchSampler +from paddle.fluid.executor import scope_guard, Executor from paddle.fluid.dygraph.layers import Layer from paddle.metric import Metric -from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized +from paddle.static import InputSpec as Input + from .callbacks import config_callbacks -from .utils import to_list, to_numpy, flatten_list, restore_flatten_list, extract_args -from .device import _get_device -__all__ = [ - 'Model', - 'Input', -] +__all__ = ['Model', ] + +_parallel_context_initialized = False + + +def to_list(value): + if value is None: + return value + if isinstance(value, (list, tuple)): + return list(value) + return [value] + + +def to_numpy(var): + assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable" + if isinstance(var, fluid.core.VarBase): + return var.numpy() + t = global_scope().find_var(var.name).get_tensor() + return np.array(t) + + +def flatten_list(l): + assert isinstance(l, list), "not a list" + outl = [] + splits = [] + for sl in l: + assert isinstance(sl, list), "sub content not a list" + splits.append(len(sl)) + outl += sl + return outl, splits + + +def restore_flatten_list(l, splits): + outl = [] + for split in splits: + assert len(l) >= split, "list length invalid" + sl, l = l[:split], l[split:] + outl.append(sl) + return outl + + +def extract_args(func): + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(func)[0] + else: + return inspect.getargspec(func)[0] + + +def _all_gather(x, nranks, ring_id=0, use_calc_stream=True): + return collective._c_allgather( + x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream) + + +def wait_server_ready(endpoints): + assert not isinstance(endpoints, six.string_types) + while True: + all_ok = True + not_ready_endpoints = [] + for ep in endpoints: + ip_port = ep.split(":") + with contextlib.closing( + socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex((ip_port[0], int(ip_port[1]))) + if result != 0: + all_ok = False + not_ready_endpoints.append(ep) + if not all_ok: + time.sleep(3) + else: + break + + +def init_communicator(program, rank, nranks, wait_port, current_endpoint, + endpoints): + if nranks < 2: + return + other_endpoints = endpoints[:] + other_endpoints.remove(current_endpoint) + if rank == 0 and wait_port: + wait_server_ready(other_endpoints) + block = program.global_block() + nccl_id_var = block.create_var( + name=fluid.unique_name.generate('nccl_id'), + persistable=True, + type=fluid.core.VarDesc.VarType.RAW) + + block.append_op( + type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + + block.append_op( + type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': 0, + }) + + +def prepare_distributed_context(place=None): + if place is None: + place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \ + else fluid.CUDAPlace(0) + + strategy = fluid.dygraph.parallel.ParallelStrategy() + strategy.nranks = ParallelEnv().nranks + strategy.local_rank = ParallelEnv().local_rank + strategy.trainer_endpoints = ParallelEnv().trainer_endpoints + strategy.current_endpoint = ParallelEnv().current_endpoint + + if strategy.nranks < 2: + return + + global _parallel_context_initialized + + if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace): + + def _init_context(): + communicator_prog = fluid.Program() + init_communicator(communicator_prog, strategy.local_rank, + strategy.nranks, True, strategy.current_endpoint, + strategy.trainer_endpoints) + exe = fluid.Executor(place) + exe.run(communicator_prog) + + if fluid.in_dygraph_mode(): + fluid.disable_dygraph() + _init_context() + fluid.enable_dygraph(place) + else: + _init_context() + + else: + assert ("Only support CUDAPlace for now.") + + _parallel_context_initialized = True + return strategy class StaticGraphAdapter(object): @@ -640,55 +785,51 @@ class Model(object): Dynamic graph and static graph are supported at the same time, switched by `paddle.disable_static()`. The usage is as follows. But note, the switching between dynamic and static should be before - instantiating a Model. The input description, i.e, hapi.Input, + instantiating a Model. The input description, i.e, paddle.static.InputSpec, must be required for static graph. Args: network (paddle.nn.Layer): The network is an instance of paddle.nn.Layer. - inputs (Input|list|dict|None): `inputs`, entry points of network, - could be a Input layer, or lits of Input layers, - or dict (name: Input), or None. For static graph, + inputs (InputSpec|list|dict|None): `inputs`, entry points of network, + could be a InputSpec instance, or lits of InputSpec instances, + or dict ({name: InputSpec}), or None. For static graph, inputs must be set. For dynamic graph, it could be None. - labels (Input|list|None): `labels`, entry points of network, - could be a Input layer or lits of Input layers, or None. - For static graph, if labels is required in loss, + labels (InputSpec|list|None): `labels`, entry points of network, + could be a InputSpec instnace or lits of InputSpec instances, + or None. For static graph, if labels is required in loss, labels must be set. Otherwise, it could be None. - Usage: + Examples: .. code-block:: python import paddle - import paddle.incubate.hapi as hapi - - class MyNet(paddle.nn.Layer): - def __init__(self, classifier_act=None): - super(MyNet, self).__init__() - self._fc1 = paddle.nn.Linear(784, 200, act=classifier_act) - - def forward(self, x): - y = self._fc1(x) - return y - - device = hapi.set_device('gpu') + import paddle.nn as nn + from paddle.static import InputSpec + + device = paddle.set_device('cpu') # or 'gpu' # if use static graph, do not set paddle.disable_static(device) - + + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + # inputs and labels are not required for dynamic graph. - input = hapi.Input([None, 784], 'float32', 'x') - label = hapi.Input([None, 1], 'int64', 'label') + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') - model = hapi.Model(MyNet(), input, label) + model = paddle.Model(net, input, label) optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameter_list=model.parameters()) + parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) - mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False) - model.fit(mnist_data, epochs=2, batch_size=32, verbose=1) - + data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + model.fit(data, epochs=2, batch_size=32, verbose=1) """ def __init__(self, network, inputs=None, labels=None): @@ -736,25 +877,22 @@ class Model(object): import numpy as np import paddle - import paddle.incubate.hapi as hapi - - class MyNet(paddle.nn.Layer): - def __init__(self, classifier_act=None): - super(MyNet, self).__init__() - self._fc = paddle.nn.Linear(784, 10, act=classifier_act) - - def forward(self, x): - y = self._fc(x) - return y + import paddle.nn as nn + from paddle.static import InputSpec - device = hapi.set_device('gpu') + device = paddle.set_device('cpu') # or 'gpu' paddle.disable_static(device) - input = hapi.Input([None, 784], 'float32', 'x') - label = hapi.Input([None, 1], 'int64', 'label') - model = hapi.Model(MyNet(), input, label) + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(net, input, label) optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameter_list=model.parameters()) + parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) data = np.random.random(size=(4,784)).astype(np.float32) label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) @@ -784,25 +922,22 @@ class Model(object): import numpy as np import paddle - import paddle.incubate.hapi as hapi - - class MyNet(paddle.nn.Layer): - def __init__(self, classifier_act=None): - super(MyNet, self).__init__() - self._fc = paddle.nn.Linear(784, 10, act=classifier_act) + import paddle.nn as nn + from paddle.static import InputSpec - def forward(self, x): - y = self._fc(x) - return y - - device = hapi.set_device('gpu') + device = paddle.set_device('cpu') # or 'gpu' paddle.disable_static(device) - input = hapi.Input([None, 784], 'float32', 'x') - label = hapi.Input([None, 1], 'int64', 'label') - model = hapi.Model(MyNet(), input, label) + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10)) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(net, input, label) optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameter_list=model.parameters()) + parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) data = np.random.random(size=(4,784)).astype(np.float32) @@ -830,20 +965,18 @@ class Model(object): import numpy as np import paddle - import paddle.incubate.hapi as hapi + import paddle.nn as nn - class MyNet(paddle.nn.Layer): - def __init__(self): - super(MyNet, self).__init__() - self._fc = paddle.nn.Linear(784, 1, act='softmax') - def forward(self, x): - y = self._fc(x) - return y - - device = hapi.set_device('gpu') + device = paddle.set_device('cpu') # or 'gpu' paddle.disable_static(device) - model = hapi.Model(MyNet()) + net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10), + nn.Softmax()) + + model = paddle.Model(net) model.prepare() data = np.random.random(size=(4,784)).astype(np.float32) out = model.test_batch([data]) @@ -884,38 +1017,40 @@ class Model(object): Examples: .. code-block:: python + import paddle - import paddle.incubate.hapi as hapi - from paddle.nn import Linear - from paddle.incubate.hapi.datasets.mnist import MNIST as MnistDataset + import paddle.nn as nn + from paddle.static import InputSpec - class Mnist(paddle.nn.Layer): + class Mnist(nn.Layer): def __init__(self): super(Mnist, self).__init__() - self._fc = Linear(784, 10, act='softmax') + self.net = nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10), + nn.Softmax()) # If save for inference in dygraph, need this @paddle.jit.to_static def forward(self, x): - y = self._fc(x) - return y + return self.net(x) dynamic = True # False - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') # if use static graph, do not set paddle.disable_static(device) if dynamic else None # inputs and labels are not required for dynamic graph. - input = hapi.Input([None, 784], 'float32', 'x') - label = hapi.Input([None, 1], 'int64', 'label') - model = hapi.Model(Mnist(), input, label) + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(Mnist(), input, label) optim = paddle.optimizer.SGD(learning_rate=1e-3, - parameter_list=model.parameters()) + parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) - mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False) - model.fit(mnist_data, epochs=1, batch_size=32, verbose=0) + data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + model.fit(data, epochs=1, batch_size=32, verbose=0) model.save('checkpoint/test') # save for training model.save('inference_model', False) # save for inference - """ if ParallelEnv().local_rank == 0: @@ -958,19 +1093,17 @@ class Model(object): .. code-block:: python import paddle - import paddle.incubate.hapi as hapi + import paddle.nn as nn - class MyNet(paddle.nn.Layer): - def __init__(self): - super(MyNet, self).__init__() - self._fc = paddle.nn.Linear(784, 1, act='softmax') - def forward(self, x): - y = self._fc(x) - return y - - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') paddle.disable_static(device) - model = hapi.Model(MyNet()) + + model = paddle.Model(nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10), + nn.Softmax())) + model.save('checkpoint/test') model.load('checkpoint/test') """ @@ -1033,18 +1166,14 @@ class Model(object): .. code-block:: python import paddle - from paddle.incubate.hapi import Model - - class MyNet(paddle.nn.Layer): - def __init__(self): - super(MyNet, self).__init__() - self._fc = paddle.nn.Linear(20, 10, act='softmax') - def forward(self, x): - y = self._fc(x) - return y + import paddle.nn as nn paddle.disable_static() - model = Model(MyNet()) + + model = paddle.Model(nn.Sequential( + nn.Linear(784, 200), + nn.Tanh(), + nn.Linear(200, 10))) params = model.parameters() """ return self._adapter.parameters() @@ -1173,19 +1302,20 @@ class Model(object): .. code-block:: python import paddle - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec dynamic = True - device = hapi.set_device('gpu') + device = paddle.set_device('cpu') # or 'gpu' paddle.disable_static(device) if dynamic else None - train_dataset = hapi.datasets.MNIST(mode='train') - val_dataset = hapi.datasets.MNIST(mode='test') + train_dataset = paddle.vision.datasets.MNIST(mode='train') + val_dataset = paddle.vision.datasets.MNIST(mode='test') - input = hapi.Input([None, 1, 28, 28], 'float32', 'image') - label = hapi.Input([None, 1], 'int64', 'label') + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), + model = paddle.Model( + paddle.vision.models.LeNet(classifier_activation=None), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) @@ -1205,24 +1335,24 @@ class Model(object): .. code-block:: python import paddle - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec dynamic = True - device = hapi.set_device('gpu') + device = paddle.set_device('cpu') # or 'gpu' paddle.disable_static(device) if dynamic else None - train_dataset = hapi.datasets.MNIST(mode='train') + train_dataset = paddle.vision.datasets.MNIST(mode='train') train_loader = paddle.io.DataLoader(train_dataset, places=device, batch_size=64) - val_dataset = hapi.datasets.MNIST(mode='test') + val_dataset = paddle.vision.datasets.MNIST(mode='test') val_loader = paddle.io.DataLoader(val_dataset, places=device, batch_size=64) - input = hapi.Input([None, 1, 28, 28], 'float32', 'image') - label = hapi.Input([None, 1], 'int64', 'label') + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), - input, label) + model = paddle.Model( + paddle.vision.models.LeNet(classifier_activation=None), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( @@ -1341,22 +1471,21 @@ class Model(object): .. code-block:: python import paddle - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec # declarative mode - val_dataset = hapi.datasets.MNIST(mode='test') + val_dataset = paddle.vision.datasets.MNIST(mode='test') - input = hapi.Input([-1, 1, 28, 28], 'float32', 'image') - label = hapi.Input([None, 1], 'int64', 'label') - model = hapi.Model(hapi.vision.LeNet(), input, label) + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') + model = paddle.Model(paddle.vision.models.LeNet(), input, label) model.prepare(metrics=paddle.metric.Accuracy()) - result = model.evaluate(val_dataset, batch_size=64) print(result) # imperative mode paddle.disable_static() - model = hapi.Model(hapi.vision.LeNet()) + model = paddle.Model(paddle.vision.models.LeNet()) model.prepare(metrics=paddle.metric.Accuracy()) result = model.evaluate(val_dataset, batch_size=64) print(result) @@ -1435,9 +1564,9 @@ class Model(object): import numpy as np import paddle - import paddle.incubate.hapi as hapi + from paddle.static import InputSpec - class MnistDataset(hapi.datasets.MNIST): + class MnistDataset(paddle.vision.datasets.MNIST): def __init__(self, mode, return_label=True): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -1454,17 +1583,17 @@ class Model(object): test_dataset = MnistDataset(mode='test', return_label=False) # declarative mode - input = hapi.Input([-1, 1, 28, 28], 'float32', 'image') - model = hapi.Model(hapi.vision.LeNet(), input) + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + model = paddle.Model(paddle.vision.models.LeNet(), input) model.prepare() result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) # imperative mode - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') paddle.disable_static(device) - model = hapi.Model(hapi.vision.LeNet()) + model = paddle.Model(paddle.vision.models.LeNet()) model.prepare() result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) diff --git a/python/paddle/incubate/hapi/progressbar.py b/python/paddle/hapi/progressbar.py similarity index 98% rename from python/paddle/incubate/hapi/progressbar.py rename to python/paddle/hapi/progressbar.py index 2487fcbde8744fa7cc186e16b0653f03629d0366..c36e875ccb7d594e9cf2ccfe0654551ccbd66afc 100644 --- a/python/paddle/incubate/hapi/progressbar.py +++ b/python/paddle/hapi/progressbar.py @@ -66,6 +66,7 @@ class ProgressBar(object): return terminal_size(80, 24) terminal_width, _ = get_terminal_size() + terminal_width = terminal_width if terminal_width > 0 else 80 max_width = min(int(terminal_width * 0.6), terminal_width - 50) return max_width diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py index aee695d419550c066fdb3eb6333780fedfe29429..2af9255971e65236bc6c73b90d2fcd6d14d7679a 100644 --- a/python/paddle/incubate/__init__.py +++ b/python/paddle/incubate/__init__.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import hapi - __all__ = [] -__all__ += hapi.__all__ __all__ += ["reader"] from ..fluid.contrib import reader diff --git a/python/paddle/incubate/hapi/device.py b/python/paddle/incubate/hapi/device.py deleted file mode 100644 index 3ff29822f6f45b7fb977b5888e7d26e293df5761..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/device.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six - -import paddle.fluid as fluid -from paddle.fluid.dygraph.parallel import ParallelEnv - -__all__ = ['set_device', ] - -# TODO(qingqing01): remove or refine _global_device, set_device and get_device -# after core framework supporting these function. -_global_device = None - - -def set_device(device): - """ - Args: - device (str): specify device type, 'cpu' or 'gpu'. - - Returns: - fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place. - - Examples: - .. code-block:: python - - import paddle.incubate.hapi as hapi - - input = hapi.set_device('gpu') - """ - - assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \ - "Expected device in ['cpu', 'gpu'], but got {}".format(device) - - device = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \ - else fluid.CPUPlace() - - global _global_device - _global_device = device - return device - - -def _get_device(): - """ - Return global device. - """ - if _global_device is not None: - device = _global_device - else: - if fluid.is_compiled_with_cuda(): - device = fluid.CUDAPlace(ParallelEnv().dev_id) - else: - device = fluid.CPUPlace() - return device diff --git a/python/paddle/incubate/hapi/distributed.py b/python/paddle/incubate/hapi/distributed.py deleted file mode 100644 index 0e38dc8edc758e9c1b8a96add1df242fb0aecef1..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/distributed.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import time -import math -import socket -import contextlib -import numpy as np - -from paddle import fluid -from paddle.fluid.layers import collective -from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy -from paddle.io import BatchSampler - -_parallel_context_initialized = False - -__all__ = ['DistributedBatchSampler'] - - -class DistributedBatchSampler(BatchSampler): - """Sampler that restricts data loading to a subset of the dataset. - - In such case, each process can pass a DistributedBatchSampler instance - as a DataLoader sampler, and load a subset of the original dataset that - is exclusive to it. - - .. note:: - Dataset is assumed to be of constant size. - - Args: - dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement - or other python object which implemented - `__len__` for BatchSampler to get sample - number of data source. - batch_size(int): sample indice number in a mini-batch indices. - num_replicas(int, optional): porcess number in distributed training. - If :attr:`num_replicas` is None, :attr:`num_replicas` will be - retrieved from :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. - Default None. - rank(int, optional): the rank of the current process among :attr:`num_replicas` - processes. If :attr:`rank` is None, :attr:`rank` is retrieved from - :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. Default None. - shuffle(bool): whther to shuffle indices order before genrating - batch indices. Default False. - drop_last(bool): whether drop the last incomplete batch dataset size - is not divisible by the batch size. Default False - - Examples: - .. code-block:: python - - import numpy as np - - from paddle.incubate.hapi.datasets import MNIST - from paddle.incubate.hapi.distributed import DistributedBatchSampler - - class MnistDataset(MNIST): - def __init__(self, mode, return_label=True): - super(MnistDataset, self).__init__(mode=mode) - self.return_label = return_label - - def __getitem__(self, idx): - img = np.reshape(self.images[idx], [1, 28, 28]) - if self.return_label: - return img, np.array(self.labels[idx]).astype('int64') - return img, - - def __len__(self): - return len(self.images) - - train_dataset = MnistDataset(mode='train') - dist_train_dataloader = DistributedBatchSampler(train_dataset, batch_size=64) - - for data in dist_train_dataloader: - # do something - break - """ - - def __init__(self, - dataset, - batch_size, - num_replicas=None, - rank=None, - shuffle=False, - drop_last=False): - self.dataset = dataset - - assert isinstance(batch_size, int) and batch_size > 0, \ - "batch_size should be a positive integer" - self.batch_size = batch_size - assert isinstance(shuffle, bool), \ - "shuffle should be a boolean value" - self.shuffle = shuffle - assert isinstance(drop_last, bool), \ - "drop_last should be a boolean number" - - if num_replicas is not None: - assert isinstance(num_replicas, int) and num_replicas > 0, \ - "num_replicas should be a positive integer" - self.nranks = num_replicas - else: - self.nranks = ParallelEnv().nranks - - if rank is not None: - assert isinstance(rank, int) and rank >= 0, \ - "rank should be a non-negative integer" - self.local_rank = rank - else: - self.local_rank = ParallelEnv().local_rank - - self.drop_last = drop_last - self.epoch = 0 - self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks)) - self.total_size = self.num_samples * self.nranks - - def __iter__(self): - num_samples = len(self.dataset) - indices = np.arange(num_samples).tolist() - indices += indices[:(self.total_size - len(indices))] - assert len(indices) == self.total_size - if self.shuffle: - np.random.RandomState(self.epoch).shuffle(indices) - self.epoch += 1 - - # subsample - def _get_indices_by_batch_size(indices): - subsampled_indices = [] - last_batch_size = self.total_size % (self.batch_size * self.nranks) - assert last_batch_size % self.nranks == 0 - last_local_batch_size = last_batch_size // self.nranks - - for i in range(self.local_rank * self.batch_size, - len(indices) - last_batch_size, - self.batch_size * self.nranks): - subsampled_indices.extend(indices[i:i + self.batch_size]) - - indices = indices[len(indices) - last_batch_size:] - subsampled_indices.extend(indices[ - self.local_rank * last_local_batch_size:( - self.local_rank + 1) * last_local_batch_size]) - return subsampled_indices - - if self.nranks > 1: - indices = _get_indices_by_batch_size(indices) - - assert len(indices) == self.num_samples - _sample_iter = iter(indices) - - batch_indices = [] - for idx in _sample_iter: - batch_indices.append(idx) - if len(batch_indices) == self.batch_size: - yield batch_indices - batch_indices = [] - if not self.drop_last and len(batch_indices) > 0: - yield batch_indices - - def __len__(self): - num_samples = self.num_samples - num_samples += int(not self.drop_last) * (self.batch_size - 1) - return num_samples // self.batch_size - - def set_epoch(self, epoch): - self.epoch = epoch - - -def _all_gather(x, nranks, ring_id=0, use_calc_stream=True): - return collective._c_allgather( - x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream) - - -def wait_server_ready(endpoints): - assert not isinstance(endpoints, six.string_types) - while True: - all_ok = True - not_ready_endpoints = [] - for ep in endpoints: - ip_port = ep.split(":") - with contextlib.closing( - socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex((ip_port[0], int(ip_port[1]))) - if result != 0: - all_ok = False - not_ready_endpoints.append(ep) - if not all_ok: - time.sleep(3) - else: - break - - -def init_communicator(program, rank, nranks, wait_port, current_endpoint, - endpoints): - if nranks < 2: - return - other_endpoints = endpoints[:] - other_endpoints.remove(current_endpoint) - if rank == 0 and wait_port: - wait_server_ready(other_endpoints) - block = program.global_block() - nccl_id_var = block.create_var( - name=fluid.unique_name.generate('nccl_id'), - persistable=True, - type=fluid.core.VarDesc.VarType.RAW) - - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': 0, - }) - - -def prepare_distributed_context(place=None): - if place is None: - place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \ - else fluid.CUDAPlace(0) - - strategy = ParallelStrategy() - strategy.nranks = ParallelEnv().nranks - strategy.local_rank = ParallelEnv().local_rank - strategy.trainer_endpoints = ParallelEnv().trainer_endpoints - strategy.current_endpoint = ParallelEnv().current_endpoint - - if strategy.nranks < 2: - return - - global _parallel_context_initialized - - if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace): - - def _init_context(): - communicator_prog = fluid.Program() - init_communicator(communicator_prog, strategy.local_rank, - strategy.nranks, True, strategy.current_endpoint, - strategy.trainer_endpoints) - exe = fluid.Executor(place) - exe.run(communicator_prog) - - if fluid.in_dygraph_mode(): - fluid.disable_dygraph() - _init_context() - fluid.enable_dygraph(place) - else: - _init_context() - - else: - assert ("Only support CUDAPlace for now.") - - _parallel_context_initialized = True - return strategy diff --git a/python/paddle/incubate/hapi/tests/CMakeLists.txt b/python/paddle/incubate/hapi/tests/CMakeLists.txt deleted file mode 100644 index 8ffcd67443f1c8722560da20d9cfb76b18a67351..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/tests/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") -string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") - - -foreach(TEST_OP ${DIST_TEST_OPS}) - list(REMOVE_ITEM TEST_OPS ${TEST_OP}) -endforeach() - -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() -set_tests_properties(test_dataset_imdb PROPERTIES TIMEOUT 150) - - -function(py_dist_test TARGET_NAME) - if(WITH_TESTING) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS ARGS ENVS) - cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true - FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 - PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - # No unit test should exceed 10 minutes. - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST") - endif() - - - endif() -endfunction() - - - -foreach(src ${DIST_TEST_OPS}) - message(STATUS ${src}) - py_dist_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py b/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py deleted file mode 100644 index 6df9b31217aae78c43de8d29956a8b2def99055b..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py +++ /dev/null @@ -1,124 +0,0 @@ -# copyright (c) 2020 paddlepaddle authors. all rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import unittest - -import numpy as np -import shutil -import tempfile - -from paddle import fluid -from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential, Softmax - -from paddle.incubate.hapi.utils import uncombined_weight_to_state_dict - - -class LeNetDygraph(fluid.dygraph.Layer): - def __init__(self, num_classes=10, classifier_activation='softmax'): - super(LeNetDygraph, self).__init__() - self.num_classes = num_classes - self.features = Sequential( - Conv2d( - 1, 6, 3, stride=1, padding=1), - ReLU(), - Pool2D(2, 'max', 2), - Conv2d( - 6, 16, 5, stride=1, padding=0), - ReLU(), - Pool2D(2, 'max', 2)) - - if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10), - Softmax()) #Todo: accept any activation - - def forward(self, inputs): - x = self.features(inputs) - - if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) - x = self.fc(x) - return x - - -class TestUncombinedWeight2StateDict(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.save_dir = tempfile.mkdtemp() - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.save_dir) - - def test_infer(self): - start_prog = fluid.Program() - train_prog = fluid.Program() - - x = fluid.data(name='x', shape=[None, 1, 28, 28], dtype='float32') - - with fluid.program_guard(train_prog, start_prog): - with fluid.unique_name.guard(): - x = fluid.data( - name='x', shape=[None, 1, 28, 28], dtype='float32') - model = LeNetDygraph() - output = model.forward(x) - - excutor = fluid.Executor() - excutor.run(start_prog) - - test_prog = train_prog.clone(for_test=True) - - fluid.io.save_params(excutor, self.save_dir, test_prog) - - rand_x = np.random.rand(1, 1, 28, 28).astype('float32') - out = excutor.run(program=test_prog, - feed={'x': rand_x}, - fetch_list=[output.name], - return_numpy=True) - - state_dict = uncombined_weight_to_state_dict(self.save_dir) - - key2key_dict = { - 'features.0.weight': 'conv2d_0.w_0', - 'features.0.bias': 'conv2d_0.b_0', - 'features.3.weight': 'conv2d_1.w_0', - 'features.3.bias': 'conv2d_1.b_0', - 'fc.0.weight': 'linear_0.w_0', - 'fc.0.bias': 'linear_0.b_0', - 'fc.1.weight': 'linear_1.w_0', - 'fc.1.bias': 'linear_1.b_0', - 'fc.2.weight': 'linear_2.w_0', - 'fc.2.bias': 'linear_2.b_0' - } - - fluid.enable_imperative() - dygraph_model = LeNetDygraph() - - converted_state_dict = dygraph_model.state_dict() - for k1, k2 in key2key_dict.items(): - converted_state_dict[k1] = state_dict[k2] - - dygraph_model.set_dict(converted_state_dict) - - dygraph_model.eval() - dy_out = dygraph_model(fluid.dygraph.to_variable(rand_x)) - - np.testing.assert_allclose(dy_out.numpy(), out[0], atol=1e-5) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/incubate/hapi/utils.py b/python/paddle/incubate/hapi/utils.py deleted file mode 100644 index d9708f2927912870218f41103df5b0f94609cd88..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/utils.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import inspect -import numpy as np - -from collections import OrderedDict -from paddle import fluid -from paddle.fluid.framework import Variable -from paddle.fluid.executor import global_scope - -__all__ = ['uncombined_weight_to_state_dict'] - - -def uncombined_weight_to_state_dict(weight_dir): - """ - Convert uncombined weight which getted by using `fluid.io.save_params` or `fluid.io.save_persistables` to state_dict - - Args: - weight_dir (str): weight direcotory path. - - Returns: - OrderDict: weight dict. - - Examples: - .. code-block:: python - - import os - - from paddle import fluid - from paddle.nn import Conv2D, Pool2D, Linear, ReLU, Sequential - from paddle.incubate.hapi.utils import uncombined_weight_to_state_dict - - - class LeNetDygraph(fluid.dygraph.Layer): - def __init__(self, num_classes=10, classifier_activation='softmax'): - super(LeNetDygraph, self).__init__() - self.num_classes = num_classes - self.features = Sequential( - Conv2D( - 1, 6, 3, stride=1, padding=1), - ReLU(), - Pool2D(2, 'max', 2), - Conv2D( - 6, 16, 5, stride=1, padding=0), - ReLU(), - Pool2D(2, 'max', 2)) - - if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), - Linear(120, 84), - Linear( - 84, 10, act=classifier_activation)) - - def forward(self, inputs): - x = self.features(inputs) - - if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) - x = self.fc(x) - return x - - # save weight use fluid.io.save_params - save_dir = 'temp' - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - start_prog = fluid.Program() - train_prog = fluid.Program() - - x = fluid.data(name='x', shape=[None, 1, 28, 28], dtype='float32') - - with fluid.program_guard(train_prog, start_prog): - with fluid.unique_name.guard(): - x = fluid.data( - name='x', shape=[None, 1, 28, 28], dtype='float32') - model = LeNetDygraph() - output = model.forward(x) - - excutor = fluid.Executor() - excutor.run(start_prog) - - test_prog = train_prog.clone(for_test=True) - - fluid.io.save_params(excutor, save_dir, test_prog) - - # convert uncombined weight to state dict - state_dict = uncombined_weight_to_state_dict(save_dir) - - key2key_dict = { - 'features.0.weight': 'conv2d_0.w_0', - 'features.0.bias': 'conv2d_0.b_0', - 'features.3.weight': 'conv2d_1.w_0', - 'features.3.bias': 'conv2d_1.b_0', - 'fc.0.weight': 'linear_0.w_0', - 'fc.0.bias': 'linear_0.b_0', - 'fc.1.weight': 'linear_1.w_0', - 'fc.1.bias': 'linear_1.b_0', - 'fc.2.weight': 'linear_2.w_0', - 'fc.2.bias': 'linear_2.b_0' - } - - fluid.enable_imperative() - dygraph_model = LeNetDygraph() - - converted_state_dict = dygraph_model.state_dict() - for k1, k2 in key2key_dict.items(): - converted_state_dict[k1] = state_dict[k2] - - # dygraph model load state dict which converted from uncombined weight - dygraph_model.set_dict(converted_state_dict) - """ - - def _get_all_params_name(dir): - params_name = [] - dir = os.path.expanduser(dir) - - dir_len = len(dir) - for root, _, fnames in sorted(os.walk(dir, followlinks=True)): - for fname in sorted(fnames): - path = os.path.join(root[dir_len:], fname) - params_name.append(path) - - return params_name - - class Load(fluid.dygraph.Layer): - def __init__(self): - super(Load, self).__init__() - - def forward(self, filename): - weight = self.create_parameter( - shape=[1], - dtype='float32', - default_initializer=fluid.initializer.ConstantInitializer(0.0)) - self._helper.append_op( - type='load', - inputs={}, - outputs={'Out': [weight]}, - attrs={'file_path': filename}) - return weight - - params_name_list = _get_all_params_name(weight_dir) - if not fluid.in_dygraph_mode(): - dygraph_enabled = False - fluid.enable_imperative() - else: - dygraph_enabled = True - - load = Load() - state_dict = OrderedDict() - - for param_name in params_name_list: - param_path = os.path.join(weight_dir, param_name) - weight = load(param_path) - try: - weight = weight.numpy() - except Exception as e: - print(e) - - state_dict[param_name] = weight - - if not dygraph_enabled: - fluid.disable_imperative() - - return state_dict - - -def to_list(value): - if value is None: - return value - if isinstance(value, (list, tuple)): - return list(value) - return [value] - - -def to_numpy(var): - assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable" - if isinstance(var, fluid.core.VarBase): - return var.numpy() - t = global_scope().find_var(var.name).get_tensor() - return np.array(t) - - -def flatten_list(l): - assert isinstance(l, list), "not a list" - outl = [] - splits = [] - for sl in l: - assert isinstance(sl, list), "sub content not a list" - splits.append(len(sl)) - outl += sl - return outl, splits - - -def restore_flatten_list(l, splits): - outl = [] - for split in splits: - assert len(l) >= split, "list length invalid" - sl, l = l[:split], l[split:] - outl.append(sl) - return outl - - -def extract_args(func): - if hasattr(inspect, 'getfullargspec'): - return inspect.getfullargspec(func)[0] - else: - return inspect.getargspec(func)[0] diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index 78f792d6a5a6698034912297f5d5a23db0b35201..b67779cb2a2ae699c8206dc717670bf6eb23b25e 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -18,6 +18,7 @@ __all__ = [ 'IterableDataset', 'TensorDataset', 'BatchSampler', + 'DistributedBatchSampler', # 'Transform', 'DataLoader', 'get_worker_info', @@ -43,7 +44,7 @@ __all__ = [ from ..fluid.io import DataLoader from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info, \ - TensorDataset, Sampler, SequenceSampler, RandomSampler + TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler from ..fluid.io import load, save, load_program_state, set_program_state, \ load_inference_model, save_inference_model, batch from ..reader import shuffle, buffered, cache, chain, firstn, compose, map_readers, xmap_readers diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 110a62c300559b9037cd2ca735aebd1946ba0ce9..1cd65171ff034e8b834c38184e4452796da985ca 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -202,12 +202,11 @@ class Accuracy(Metric): .. code-block:: python import paddle - import paddle.incubate.hapi as hapi paddle.disable_static() - train_dataset = hapi.datasets.MNIST(mode='train') + train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None)) + model = paddle.Model(paddle.vision.LeNet(classifier_activation=None)) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( @@ -336,7 +335,6 @@ class Precision(Metric): import paddle import paddle.nn as nn - import paddle.incubate.hapi as hapi class Data(paddle.io.Dataset): def __init__(self): @@ -352,7 +350,7 @@ class Precision(Metric): return self.n paddle.disable_static() - model = hapi.Model(nn.Sequential( + model = paddle.Model(nn.Sequential( nn.Linear(10, 1), nn.Sigmoid() )) @@ -471,7 +469,6 @@ class Recall(Metric): import paddle import paddle.nn as nn - import paddle.incubate.hapi as hapi class Data(paddle.io.Dataset): def __init__(self): @@ -487,7 +484,7 @@ class Recall(Metric): return self.n paddle.disable_static() - model = hapi.Model(nn.Sequential( + model = paddle.Model(nn.Sequential( nn.Linear(10, 1), nn.Sigmoid() )) @@ -617,7 +614,6 @@ class Auc(Metric): import numpy as np import paddle import paddle.nn as nn - import paddle.incubate.hapi as hapi class Data(paddle.io.Dataset): def __init__(self): @@ -633,9 +629,9 @@ class Auc(Metric): return self.n paddle.disable_static() - model = hapi.Model(nn.Sequential( - nn.Linear(10, 2, act='softmax'), - )) + model = paddle.Model(nn.Sequential( + nn.Linear(10, 2), nn.Softmax()) + ) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index 79bec8c4ad34d682895250bc29b1fddb3a569bd4..e1bc65a5d15c2883e14d20c5e06c2ee3cd726ea5 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -1,6 +1,41 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") +string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") + +foreach(TEST_OP ${DIST_TEST_OPS}) + list(REMOVE_ITEM TEST_OPS ${TEST_OP}) +endforeach() + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() + +function(py_dist_test TARGET_NAME) + if(WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS ARGS ENVS) + cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32) + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true + FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 + PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + # No unit test should exceed 10 minutes. + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST") + endif() + + + endif() +endfunction() + +foreach(src ${DIST_TEST_OPS}) + message(STATUS ${src}) + py_dist_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py b/python/paddle/tests/dist_hapi_mnist_dynamic.py similarity index 91% rename from python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py rename to python/paddle/tests/dist_hapi_mnist_dynamic.py index ede99a50c2fa72da3bd1999204a5fe1e5a656be2..13d966bf38f2aaed35e120aa4d25705cfc36c230 100644 --- a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py +++ b/python/paddle/tests/dist_hapi_mnist_dynamic.py @@ -20,14 +20,15 @@ import unittest import numpy as np import contextlib -from paddle import fluid +import paddle +import paddle.fluid as fluid -from paddle.incubate.hapi import Model, Input, set_device +from paddle import Model, set_device +from paddle.static import InputSpec as Input from paddle.nn.layer.loss import CrossEntropyLoss -from paddle.incubate.hapi.vision.models import LeNet from paddle.metric import Accuracy -from paddle.incubate.hapi.callbacks import ProgBarLogger -from paddle.incubate.hapi.datasets import MNIST +from paddle.vision.models import LeNet +from paddle.vision.datasets import MNIST class MnistDataset(MNIST): @@ -76,7 +77,7 @@ class TestDistTraning(unittest.TestCase): val_dataset = MnistDataset(mode='test') test_dataset = MnistDataset(mode='test', return_label=False) - cbk = ProgBarLogger(50) + cbk = paddle.callbacks.ProgBarLogger(50) model.fit(train_dataset, val_dataset, epochs=2, diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py similarity index 91% rename from python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py rename to python/paddle/tests/dist_hapi_mnist_static.py index 28305fc6a6fd08c160f946920e85391cd444caef..9d8e5f3652c9810579a0b66035a64d1d3b915bff 100644 --- a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py +++ b/python/paddle/tests/dist_hapi_mnist_static.py @@ -20,14 +20,15 @@ import unittest import numpy as np import contextlib -from paddle import fluid +import paddle +import paddle.fluid as fluid -from paddle.incubate.hapi import Model, Input, set_device +from paddle import Model, set_device +from paddle.static import InputSpec as Input from paddle.nn.layer.loss import CrossEntropyLoss -from paddle.incubate.hapi.vision.models import LeNet from paddle.metric import Accuracy -from paddle.incubate.hapi.callbacks import ProgBarLogger -from paddle.incubate.hapi.datasets import MNIST +from paddle.vision.models import LeNet +from paddle.vision.datasets import MNIST class MnistDataset(MNIST): @@ -75,7 +76,7 @@ class TestDistTraning(unittest.TestCase): val_dataset = MnistDataset(mode='test') test_dataset = MnistDataset(mode='test', return_label=False) - cbk = ProgBarLogger(50) + cbk = paddle.callbacks.ProgBarLogger(50) model.fit(train_dataset, val_dataset, epochs=2, diff --git a/python/paddle/incubate/hapi/tests/test_callbacks.py b/python/paddle/tests/test_callbacks.py similarity index 93% rename from python/paddle/incubate/hapi/tests/test_callbacks.py rename to python/paddle/tests/test_callbacks.py index e49bf215c276c8b495b0f991a5821d4c674f48d2..f0d9a132b90eb1c7006fd53557a03376394ee2ab 100644 --- a/python/paddle/incubate/hapi/tests/test_callbacks.py +++ b/python/paddle/tests/test_callbacks.py @@ -18,9 +18,10 @@ import random import tempfile import shutil -from paddle.incubate.hapi.model import Model, Input -from paddle.incubate.hapi.vision.models import LeNet -from paddle.incubate.hapi.callbacks import config_callbacks +from paddle import Model +from paddle.static import InputSpec +from paddle.vision.models import LeNet +from paddle.hapi.callbacks import config_callbacks class TestCallbacks(unittest.TestCase): @@ -36,7 +37,7 @@ class TestCallbacks(unittest.TestCase): freq = 2 eval_steps = 20 - inputs = [Input([None, 1, 28, 28], 'float32', 'image')] + inputs = [InputSpec([None, 1, 28, 28], 'float32', 'image')] lenet = Model(LeNet(), inputs) lenet.prepare() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py similarity index 93% rename from python/paddle/incubate/hapi/tests/test_dataset_cifar.py rename to python/paddle/tests/test_dataset_cifar.py index 08d9f4353c0ed639f5ad907c921bf7b2c88271f5..2ecc41c3f0a81a56cc34e826483ea4f5cc6681d9 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.vision.datasets import * class TestCifar10Train(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_conll05.py b/python/paddle/tests/test_dataset_conll05.py similarity index 87% rename from python/paddle/incubate/hapi/tests/test_dataset_conll05.py rename to python/paddle/tests/test_dataset_conll05.py index 0ed2a4180d0cb341f5d57bdf1cb9d8ef145a44fb..e35c04275d20478336da76c9ba47c98960a9ea24 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_conll05.py +++ b/python/paddle/tests/test_dataset_conll05.py @@ -12,15 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest import os +import unittest import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestConll05st(unittest.TestCase): @@ -36,6 +32,8 @@ class TestConll05st(unittest.TestCase): for s in sample: self.assertTrue(len(s.shape) == 1) + assert os.path.exists(conll05st.get_embedding()) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_imdb.py b/python/paddle/tests/test_dataset_imdb.py similarity index 90% rename from python/paddle/incubate/hapi/tests/test_dataset_imdb.py rename to python/paddle/tests/test_dataset_imdb.py index cef73634b6b5fb114fa88b785bb77a87fe129bd5..62c75ab232c8db10f99257fdae17191f94726b61 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_imdb.py +++ b/python/paddle/tests/test_dataset_imdb.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestImdbTrain(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_imikolov.py b/python/paddle/tests/test_dataset_imikolov.py similarity index 89% rename from python/paddle/incubate/hapi/tests/test_dataset_imikolov.py rename to python/paddle/tests/test_dataset_imikolov.py index f3d97d314acbf7f55a8482fd386581fef7f16e03..f4f0b8e48367725abb4ebe1fe5b0598ed6e749f1 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_imikolov.py +++ b/python/paddle/tests/test_dataset_imikolov.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestImikolovTrain(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py b/python/paddle/tests/test_dataset_movie_reviews.py similarity index 90% rename from python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py rename to python/paddle/tests/test_dataset_movie_reviews.py index ae8a7a3035ee0e86f8ee2fa9e8a23f6036758d2d..e6e6667013f89aca305f82a744c00de2af818736 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py +++ b/python/paddle/tests/test_dataset_movie_reviews.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestMovieReviewsTrain(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_movielens.py b/python/paddle/tests/test_dataset_movielens.py similarity index 91% rename from python/paddle/incubate/hapi/tests/test_dataset_movielens.py rename to python/paddle/tests/test_dataset_movielens.py index f94269f930e05e04b3bdfc4324e5ae1ea15b1fb9..3b61fd6f5c7c22bca5114579fdafe46405f77118 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_movielens.py +++ b/python/paddle/tests/test_dataset_movielens.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestMovielensTrain(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py b/python/paddle/tests/test_dataset_uci_housing.py similarity index 96% rename from python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py rename to python/paddle/tests/test_dataset_uci_housing.py index 768367bff9911a352ea6b13f279d5b71938bc85b..623c7d24d09da7501edd6a8d86e60fc3b772d086 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py +++ b/python/paddle/tests/test_dataset_uci_housing.py @@ -19,8 +19,7 @@ import tempfile import shutil import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestUCIHousingTrain(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dataset_voc.py b/python/paddle/tests/test_dataset_voc.py similarity index 92% rename from python/paddle/incubate/hapi/tests/test_dataset_voc.py rename to python/paddle/tests/test_dataset_voc.py index 85766ab8e30a3a7abd5e2966e6353b116c03e926..d45df419b1283a40b46252bee2d37a9e2fdaadb9 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_voc.py +++ b/python/paddle/tests/test_dataset_voc.py @@ -15,12 +15,8 @@ import unittest import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import voc2012, VOC2012 -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.vision.datasets import voc2012, VOC2012 # VOC2012 is too large for unittest to download, stub a small dataset here voc2012.VOC_URL = 'https://paddlemodels.bj.bcebos.com/voc2012_stub/VOCtrainval_11-May-2012.tar' diff --git a/python/paddle/incubate/hapi/tests/test_dataset_wmt.py b/python/paddle/tests/test_dataset_wmt.py similarity index 95% rename from python/paddle/incubate/hapi/tests/test_dataset_wmt.py rename to python/paddle/tests/test_dataset_wmt.py index 987e55676aadb77582c58b13e626d7258f3c75b5..b4945cb90f991e907812129f3918ef0137565244 100644 --- a/python/paddle/incubate/hapi/tests/test_dataset_wmt.py +++ b/python/paddle/tests/test_dataset_wmt.py @@ -13,14 +13,9 @@ # limitations under the License. import unittest -import os import numpy as np -import tempfile -import shutil -import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.text.datasets import * class TestWMT14Train(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_datasets.py b/python/paddle/tests/test_datasets.py similarity index 97% rename from python/paddle/incubate/hapi/tests/test_datasets.py rename to python/paddle/tests/test_datasets.py index 7f544e5ad84d5aa2041e8fdb6c1ac77cc34d8164..1e50ff60aa5c3039c21d6e1e3a714c32000462c7 100644 --- a/python/paddle/incubate/hapi/tests/test_datasets.py +++ b/python/paddle/tests/test_datasets.py @@ -19,8 +19,8 @@ import tempfile import shutil import cv2 -from paddle.incubate.hapi.datasets import * -from paddle.incubate.hapi.datasets.utils import _check_exists_and_download +from paddle.vision.datasets import * +from paddle.dataset.common import _check_exists_and_download class TestFolderDatasets(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py b/python/paddle/tests/test_dist_hapi_model.py similarity index 100% rename from python/paddle/incubate/hapi/tests/test_dist_hapi_model.py rename to python/paddle/tests/test_dist_hapi_model.py diff --git a/python/paddle/incubate/hapi/tests/test_download.py b/python/paddle/tests/test_download.py similarity index 97% rename from python/paddle/incubate/hapi/tests/test_download.py rename to python/paddle/tests/test_download.py index e8bd8306daf651dfbe96881424a02d4ffdb2a9e6..6fb53573c21a1589e474e337d058294c09f65f38 100644 --- a/python/paddle/incubate/hapi/tests/test_download.py +++ b/python/paddle/tests/test_download.py @@ -14,7 +14,7 @@ import unittest -from paddle.incubate.hapi.download import get_weights_path_from_url +from paddle.utils.download import get_weights_path_from_url class TestDownload(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_logger.py b/python/paddle/tests/test_logger.py similarity index 96% rename from python/paddle/incubate/hapi/tests/test_logger.py rename to python/paddle/tests/test_logger.py index f25d0ee4f7e2f0db1031f1f2884fb6df338003cc..b6edec8674a64fb7ce41f4e60d8d6b8822c514e3 100644 --- a/python/paddle/incubate/hapi/tests/test_logger.py +++ b/python/paddle/tests/test_logger.py @@ -21,7 +21,7 @@ import numpy as np import shutil import tempfile -from paddle.incubate.hapi.logger import setup_logger +from paddle.hapi.logger import setup_logger class TestSetupLogger(unittest.TestCase): diff --git a/python/paddle/tests/test_metrics.py b/python/paddle/tests/test_metrics.py index 2272a81b3f602ec46972c9d4620ded9680e2ff5f..f05cdf9c6da10bdcb68739e7018933d0ebe006dc 100644 --- a/python/paddle/tests/test_metrics.py +++ b/python/paddle/tests/test_metrics.py @@ -22,7 +22,7 @@ import numpy as np import paddle import paddle.fluid as fluid -from paddle.incubate.hapi.utils import to_list +from paddle.hapi.model import to_list def accuracy(pred, label, topk=(1, )): diff --git a/python/paddle/incubate/hapi/tests/test_model.py b/python/paddle/tests/test_model.py similarity index 90% rename from python/paddle/incubate/hapi/tests/test_model.py rename to python/paddle/tests/test_model.py index 7fc471aa1e2eeb80ae81d4a32b09eeff74193e6f..7b79b25cbc3e98b802bad87386ad0572ec6ab8d7 100644 --- a/python/paddle/incubate/hapi/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -24,21 +24,22 @@ import tempfile import paddle from paddle import fluid +from paddle import to_tensor from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential, Softmax -from paddle.fluid.dygraph.base import to_variable -import paddle.incubate.hapi as hapi -from paddle.incubate.hapi import Model, Input +from paddle import Model +from paddle.static import InputSpec from paddle.nn.layer.loss import CrossEntropyLoss from paddle.metric import Accuracy -from paddle.incubate.hapi.datasets import MNIST -from paddle.incubate.hapi.vision.models import LeNet -from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context +from paddle.vision.datasets import MNIST +from paddle.vision.models import LeNet +from paddle.io import DistributedBatchSampler +from paddle.hapi.model import prepare_distributed_context from paddle.fluid.dygraph.jit import declarative from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator -class LeNetDygraph(fluid.dygraph.Layer): +class LeNetDygraph(paddle.nn.Layer): def __init__(self, num_classes=10, classifier_activation=None): super(LeNetDygraph, self).__init__() self.num_classes = num_classes @@ -154,7 +155,7 @@ class TestModel(unittest.TestCase): def setUpClass(cls): if not fluid.is_compiled_with_cuda(): self.skipTest('module not tested when ONLY_CPU compling') - cls.device = hapi.set_device('gpu') + cls.device = paddle.set_device('gpu') fluid.enable_dygraph(cls.device) sp_num = 1280 @@ -180,8 +181,8 @@ class TestModel(unittest.TestCase): cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader) - cls.inputs = [Input([-1, 1, 28, 28], 'float32', 'image')] - cls.labels = [Input([None, 1], 'int64', 'label')] + cls.inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')] + cls.labels = [InputSpec([None, 1], 'int64', 'label')] cls.save_dir = tempfile.mkdtemp() cls.weight_path = os.path.join(cls.save_dir, 'lenet') @@ -314,7 +315,7 @@ class TestModel(unittest.TestCase): fluid.disable_dygraph() if dynamic else None -class MyModel(fluid.dygraph.Layer): +class MyModel(paddle.nn.Layer): def __init__(self, classifier_activation='softmax'): super(MyModel, self).__init__() self._fc = Linear(20, 10) @@ -343,8 +344,8 @@ class TestModelFunction(unittest.TestCase): optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=m.parameters()) m.train() - output = m(to_variable(data)) - loss = CrossEntropyLoss(reduction='sum')(output, to_variable(label)) + output = m(to_tensor(data)) + loss = CrossEntropyLoss(reduction='sum')(output, to_tensor(label)) avg_loss = fluid.layers.reduce_sum(loss) avg_loss.backward() optim.minimize(avg_loss) @@ -354,7 +355,7 @@ class TestModelFunction(unittest.TestCase): ref = get_expect() for dynamic in [True, False]: - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None self.set_seed() @@ -362,8 +363,8 @@ class TestModelFunction(unittest.TestCase): optim2 = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) - inputs = [Input([None, dim], 'float32', 'x')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, dim], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs, labels) model.prepare(optim2, loss=CrossEntropyLoss(reduction="sum")) loss, = model.train_batch([data], [label]) @@ -379,17 +380,17 @@ class TestModelFunction(unittest.TestCase): self.set_seed() m = MyModel() m.eval() - output = m(to_variable(data)) + output = m(to_tensor(data)) fluid.disable_dygraph() return output.numpy() ref = get_expect() for dynamic in [True, False]: - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None self.set_seed() net = MyModel() - inputs = [Input([None, dim], 'float32', 'x')] + inputs = [InputSpec([None, dim], 'float32', 'x')] model = Model(net, inputs) model.prepare() out, = model.test_batch([data]) @@ -400,11 +401,11 @@ class TestModelFunction(unittest.TestCase): def test_save_load(self): path = tempfile.mkdtemp() for dynamic in [True, False]: - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None net = MyModel(classifier_activation=None) - inputs = [Input([None, 20], 'float32', 'x')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, 20], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) @@ -418,7 +419,7 @@ class TestModelFunction(unittest.TestCase): def test_dynamic_save_static_load(self): path = tempfile.mkdtemp() # dynamic saving - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) model = Model(MyModel(classifier_activation=None)) optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -427,8 +428,8 @@ class TestModelFunction(unittest.TestCase): model.save(path + '/test') fluid.disable_dygraph() - inputs = [Input([None, 20], 'float32', 'x')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, 20], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(MyModel(classifier_activation=None), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) @@ -440,20 +441,20 @@ class TestModelFunction(unittest.TestCase): path = tempfile.mkdtemp() net = MyModel(classifier_activation=None) - inputs = [Input([None, 20], 'float32', 'x')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, 20], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.save(path + '/test') - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) #if dynamic else None net = MyModel(classifier_activation=None) - inputs = [Input([None, 20], 'float32', 'x')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, 20], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) @@ -464,10 +465,10 @@ class TestModelFunction(unittest.TestCase): def test_parameters(self): for dynamic in [True, False]: - device = hapi.set_device('cpu') + device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None net = MyModel() - inputs = [Input([None, 20], 'float32', 'x')] + inputs = [InputSpec([None, 20], 'float32', 'x')] model = Model(net, inputs) model.prepare() params = model.parameters() @@ -482,7 +483,7 @@ class TestModelFunction(unittest.TestCase): prog_translator = ProgramTranslator() prog_translator.enable(False) if not dynamic else None net = LeNetDeclarative() - inputs = [Input([None, 1, 28, 28], 'float32', 'x')] + inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] model = Model(net, inputs) model.prepare() save_dir = tempfile.mkdtemp() @@ -514,8 +515,8 @@ class TestRaiseError(unittest.TestCase): def test_input_without_name(self): net = MyModel(classifier_activation=None) - inputs = [Input([None, 10], 'float32')] - labels = [Input([None, 1], 'int64', 'label')] + inputs = [InputSpec([None, 10], 'float32')] + labels = [InputSpec([None, 1], 'int64', 'label')] with self.assertRaises(ValueError): model = Model(net, inputs, labels) diff --git a/python/paddle/incubate/hapi/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py similarity index 82% rename from python/paddle/incubate/hapi/tests/test_pretrained_model.py rename to python/paddle/tests/test_pretrained_model.py index 334ebff449d4f34c9a5a9b56ee7998b4dbc5abf0..641147d39e94f7c2bbb426900ed484546bad49c6 100644 --- a/python/paddle/incubate/hapi/tests/test_pretrained_model.py +++ b/python/paddle/tests/test_pretrained_model.py @@ -15,9 +15,9 @@ import unittest import numpy as np -import paddle.fluid as fluid -import paddle.incubate.hapi.vision.models as models -from paddle.incubate.hapi import Model, Input +import paddle +from paddle.static import InputSpec +import paddle.vision.models as models # test the predicted resutls of static graph and dynamic graph are equal @@ -25,16 +25,16 @@ from paddle.incubate.hapi import Model, Input class TestPretrainedModel(unittest.TestCase): def infer(self, x, arch, dygraph=True): if dygraph: - fluid.enable_dygraph() + paddle.disable_static() net = models.__dict__[arch](pretrained=True, classifier_activation=None) - inputs = [Input([None, 3, 224, 224], 'float32', 'image')] - model = Model(network=net, inputs=inputs) + inputs = [InputSpec([None, 3, 224, 224], 'float32', 'image')] + model = paddle.Model(network=net, inputs=inputs) model.prepare() res = model.test_batch(x) if dygraph: - fluid.disable_dygraph() + paddle.enable_static() return res def test_models(self): diff --git a/python/paddle/incubate/hapi/tests/test_progressbar.py b/python/paddle/tests/test_progressbar.py similarity index 97% rename from python/paddle/incubate/hapi/tests/test_progressbar.py rename to python/paddle/tests/test_progressbar.py index ff315ef505606aaf45b46a722de8f0386ae2d5ed..4726522918238a2f88b73edbdebb3dea6fbe1281 100644 --- a/python/paddle/incubate/hapi/tests/test_progressbar.py +++ b/python/paddle/tests/test_progressbar.py @@ -17,7 +17,7 @@ import unittest import random import time -from paddle.incubate.hapi.progressbar import ProgressBar +from paddle.hapi.progressbar import ProgressBar class TestProgressBar(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_text.py b/python/paddle/tests/test_text.py similarity index 99% rename from python/paddle/incubate/hapi/tests/test_text.py rename to python/paddle/tests/test_text.py index c4fef0d749ce788e50d8cffdf9b7041e33d078af..43968896c18bda6445de46773899128e1bedff53 100644 --- a/python/paddle/incubate/hapi/tests/test_text.py +++ b/python/paddle/tests/test_text.py @@ -24,8 +24,9 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder -from paddle.incubate.hapi import Model, Input, set_device -from paddle.incubate.hapi.text import * +from paddle import Model, set_device +from paddle.static import InputSpec as Input +from paddle.text import * class ModuleApiTest(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_transforms.py b/python/paddle/tests/test_transforms.py similarity index 96% rename from python/paddle/incubate/hapi/tests/test_transforms.py rename to python/paddle/tests/test_transforms.py index 84208fda1e947f343de52a0a3c8de68322672013..6c2944d1e750faa9e9ed8d3c2b832b8aff59b954 100644 --- a/python/paddle/incubate/hapi/tests/test_transforms.py +++ b/python/paddle/tests/test_transforms.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# when test, you should add hapi root path to the PYTHONPATH, -# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH import unittest import os import tempfile @@ -21,9 +19,9 @@ import cv2 import shutil import numpy as np -from paddle.incubate.hapi.datasets import DatasetFolder -from paddle.incubate.hapi.vision.transforms import transforms -import paddle.incubate.hapi.vision.transforms.functional as F +from paddle.vision.datasets import DatasetFolder +from paddle.vision.transforms import transforms +import paddle.vision.transforms.functional as F class TestTransforms(unittest.TestCase): diff --git a/python/paddle/incubate/hapi/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py similarity index 86% rename from python/paddle/incubate/hapi/tests/test_vision_models.py rename to python/paddle/tests/test_vision_models.py index 2dc9355bcc3005d48b7046123b024fa2a91594c3..44f9ab5390122f086af4168e225fe2b5a2d8a9b2 100644 --- a/python/paddle/incubate/hapi/tests/test_vision_models.py +++ b/python/paddle/tests/test_vision_models.py @@ -15,8 +15,9 @@ import unittest import numpy as np -import paddle.incubate.hapi.vision.models as models -import paddle.incubate.hapi as hapi +import paddle +from paddle.static import InputSpec +import paddle.vision.models as models class TestVisonModels(unittest.TestCase): @@ -28,8 +29,8 @@ class TestVisonModels(unittest.TestCase): else: net = models.__dict__[arch](pretrained=pretrained) - input = hapi.Input([None, 3, 224, 224], 'float32', 'image') - model = hapi.Model(net, input) + input = InputSpec([None, 3, 224, 224], 'float32', 'image') + model = paddle.Model(net, input) model.prepare() model.test_batch(x) @@ -71,8 +72,8 @@ class TestVisonModels(unittest.TestCase): self.models_infer('resnet152') def test_lenet(self): - input = hapi.Input([None, 1, 28, 28], 'float32', 'x') - lenet = hapi.Model(models.__dict__['LeNet'](), input) + input = InputSpec([None, 1, 28, 28], 'float32', 'x') + lenet = paddle.Model(models.__dict__['LeNet'](), input) lenet.prepare() x = np.array(np.random.random((2, 1, 28, 28)), dtype=np.float32) diff --git a/python/paddle/incubate/hapi/text/__init__.py b/python/paddle/text/__init__.py similarity index 86% rename from python/paddle/incubate/hapi/text/__init__.py rename to python/paddle/text/__init__.py index 7caab7071c9977e2ea1148e415cd51c33bfd1de0..083bfbd1d2528eceb070f32e5cc502382e4d6ea4 100644 --- a/python/paddle/incubate/hapi/text/__init__.py +++ b/python/paddle/text/__init__.py @@ -15,4 +15,8 @@ from . import text from .text import * -__all__ = text.__all__ +from . import datasets +from .datasets import * + +__all__ = text.__all__ \ + + datasets.__all__ diff --git a/python/paddle/incubate/hapi/datasets/__init__.py b/python/paddle/text/datasets/__init__.py similarity index 75% rename from python/paddle/incubate/hapi/datasets/__init__.py rename to python/paddle/text/datasets/__init__.py index a88b0e6bbf1975d97bfeb68025b978ce877c6baf..b5cea40a4f4924fee7a76bad6030a21fa5a61268 100644 --- a/python/paddle/incubate/hapi/datasets/__init__.py +++ b/python/paddle/text/datasets/__init__.py @@ -12,11 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import folder -from . import mnist -from . import flowers -from . import cifar -from . import voc2012 from . import conll05 from . import imdb from . import imikolov @@ -26,11 +21,6 @@ from . import uci_housing from . import wmt14 from . import wmt16 -from .folder import * -from .mnist import * -from .flowers import * -from .cifar import * -from .voc2012 import * from .conll05 import * from .imdb import * from .imikolov import * @@ -40,12 +30,7 @@ from .uci_housing import * from .wmt14 import * from .wmt16 import * -__all__ = folder.__all__ \ - + mnist.__all__ \ - + flowers.__all__ \ - + cifar.__all__ \ - + voc2012.__all__ \ - + conll05.__all__ \ +__all__ = conll05.__all__ \ + imdb.__all__ \ + imikolov.__all__ \ + movielens.__all__ \ diff --git a/python/paddle/incubate/hapi/datasets/conll05.py b/python/paddle/text/datasets/conll05.py similarity index 86% rename from python/paddle/incubate/hapi/datasets/conll05.py rename to python/paddle/text/datasets/conll05.py index 094e3559335363524c4ae893f70294a4afaa7037..8dd6db656ebe4ad08db301209e0dfe19fa1cf895 100644 --- a/python/paddle/incubate/hapi/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -22,7 +22,7 @@ from six.moves import cPickle as pickle from paddle.io import Dataset import paddle.compat as cpt -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['Conll05st'] @@ -71,29 +71,29 @@ class Conll05st(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Conll05st + import paddle + from paddle.text.datasets import Conll05st - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, pred_idx, mark, label): - return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) + def forward(self, pred_idx, mark, label): + return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) - paddle.disable_static() + paddle.disable_static() - conll05st = Conll05st() + conll05st = Conll05st() - for i in range(10): - pred_idx, mark, label= conll05st[i][-3:] - pred_idx = paddle.to_tensor(pred_idx) - mark = paddle.to_tensor(mark) - label = paddle.to_tensor(label) + for i in range(10): + pred_idx, mark, label= conll05st[i][-3:] + pred_idx = paddle.to_tensor(pred_idx) + mark = paddle.to_tensor(mark) + label = paddle.to_tensor(label) - model = SimpleNet() - pred_idx, mark, label= model(pred_idx, mark, label) - print(pred_idx.numpy(), mark.numpy(), label.numpy()) + model = SimpleNet() + pred_idx, mark, label= model(pred_idx, mark, label) + print(pred_idx.numpy(), mark.numpy(), label.numpy()) """ @@ -131,6 +131,12 @@ class Conll05st(Dataset): target_dict_file, TRGDICT_URL, TRGDICT_MD5, 'conll05st', download) + self.emb_file = emb_file + if self.emb_file is None: + assert download, "emb_file is not set and downloading automatically is disabled" + self.emb_file = _check_exists_and_download( + emb_file, EMB_URL, EMB_MD5, 'conll05st', download) + self.word_dict = self._load_dict(self.word_dict_file) self.predicate_dict = self._load_dict(self.verb_dict_file) self.label_dict = self._load_label_dict(self.target_dict_file) @@ -290,8 +296,27 @@ class Conll05st(Dataset): def get_dict(self): """ Get the word, verb and label dictionary of Wikipedia corpus. + + Examples: + + .. code-block:: python + + from paddle.text.datasets import Conll05st + conll05st = Conll05st() + word_dict, predicate_dict, label_dict = conll05st.get_dict() """ return self.word_dict, self.predicate_dict, self.label_dict def get_embedding(self): + """ + Get the embedding dictionary file. + + Examples: + + .. code-block:: python + + from paddle.text.datasets import Conll05st + conll05st = Conll05st() + emb_file = conll05st.get_embedding() + """ return self.emb_file diff --git a/python/paddle/incubate/hapi/datasets/imdb.py b/python/paddle/text/datasets/imdb.py similarity index 84% rename from python/paddle/incubate/hapi/datasets/imdb.py rename to python/paddle/text/datasets/imdb.py index 12d166bc784a382ac5ae70491d3e8061ad1d1e9f..f1bf247efcaf7591fe8062976d6329898ee15258 100644 --- a/python/paddle/incubate/hapi/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -22,7 +22,7 @@ import numpy as np import collections from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['Imdb'] @@ -49,28 +49,28 @@ class Imdb(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Imdb + import paddle + from paddle.text.datasets import Imdb - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, doc, label): - return paddle.sum(doc), label + def forward(self, doc, label): + return paddle.sum(doc), label - paddle.disable_static() + paddle.disable_static() - imdb = Imdb(mode='train') + imdb = Imdb(mode='train') - for i in range(10): - doc, label = imdb[i] - doc = paddle.to_tensor(doc) - label = paddle.to_tensor(label) + for i in range(10): + doc, label = imdb[i] + doc = paddle.to_tensor(doc) + label = paddle.to_tensor(label) - model = SimpleNet() - image, label = model(doc, label) - print(doc.numpy().shape, label.numpy().shape) + model = SimpleNet() + image, label = model(doc, label) + print(doc.numpy().shape, label.numpy().shape) """ diff --git a/python/paddle/incubate/hapi/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py similarity index 87% rename from python/paddle/incubate/hapi/datasets/imikolov.py rename to python/paddle/text/datasets/imikolov.py index 2e6ad43b506265ee8c9c8617a87eba5a041632bd..cfd437021b953942535a880e4ce6ee41edb932d6 100644 --- a/python/paddle/incubate/hapi/datasets/imikolov.py +++ b/python/paddle/text/datasets/imikolov.py @@ -20,7 +20,7 @@ import numpy as np import collections from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['Imikolov'] @@ -49,28 +49,28 @@ class Imikolov(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Imikolov + import paddle + from paddle.text.datasets import Imikolov - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, src, trg): - return paddle.sum(src), paddle.sum(trg) + def forward(self, src, trg): + return paddle.sum(src), paddle.sum(trg) - paddle.disable_static() + paddle.disable_static() - imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) + imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) - for i in range(10): - src, trg = imikolov[i] - src = paddle.to_tensor(src) - trg = paddle.to_tensor(trg) + for i in range(10): + src, trg = imikolov[i] + src = paddle.to_tensor(src) + trg = paddle.to_tensor(trg) - model = SimpleNet() - src, trg = model(src, trg) - print(src.numpy().shape, trg.numpy().shape) + model = SimpleNet() + src, trg = model(src, trg) + print(src.numpy().shape, trg.numpy().shape) """ diff --git a/python/paddle/incubate/hapi/datasets/movie_reviews.py b/python/paddle/text/datasets/movie_reviews.py similarity index 80% rename from python/paddle/incubate/hapi/datasets/movie_reviews.py rename to python/paddle/text/datasets/movie_reviews.py index 7bf0684ebcd315807b9dc736c5481383073e5ba8..db5b15654f96712abc842ca0c99654c1b7378808 100644 --- a/python/paddle/incubate/hapi/datasets/movie_reviews.py +++ b/python/paddle/text/datasets/movie_reviews.py @@ -54,28 +54,28 @@ class MovieReviews(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import MovieReviews + import paddle + from paddle.text.datasets import MovieReviews - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, word, category): - return paddle.sum(word), category + def forward(self, word, category): + return paddle.sum(word), category - paddle.disable_static() + paddle.disable_static() - movie_reviews = MovieReviews(mode='train') + movie_reviews = MovieReviews(mode='train') - for i in range(10): - word_list, category = movie_reviews[i] - word_list = paddle.to_tensor(word_list) - category = paddle.to_tensor(category) + for i in range(10): + word_list, category = movie_reviews[i] + word_list = paddle.to_tensor(word_list) + category = paddle.to_tensor(category) - model = SimpleNet() - word_list, category = model(word_list, category) - print(word_list.numpy().shape, category.numpy()) + model = SimpleNet() + word_list, category = model(word_list, category) + print(word_list.numpy().shape, category.numpy()) """ @@ -91,10 +91,10 @@ class MovieReviews(Dataset): def _get_word_dict(self): """ - Sorted the words by the frequency of words which occur in sample - :return: - words_freq_sorted - """ + Sorted the words by the frequency of words which occur in sample + :return: + words_freq_sorted + """ words_freq_sorted = list() word_freq_dict = collections.defaultdict(int) @@ -110,10 +110,10 @@ class MovieReviews(Dataset): def _sort_files(self): """ - Sorted the sample for cross reading the sample - :return: - files_list - """ + Sorted the sample for cross reading the sample + :return: + files_list + """ files_list = list() neg_file_list = movie_reviews.fileids('neg') pos_file_list = movie_reviews.fileids('pos') @@ -123,10 +123,10 @@ class MovieReviews(Dataset): def _load_sentiment_data(self): """ - Load the data set - :return: - data_set - """ + Load the data set + :return: + data_set + """ self.data = [] words_ids = dict(self._get_word_dict()) for sample_file in self._sort_files(): @@ -138,8 +138,8 @@ class MovieReviews(Dataset): def _download_data_if_not_yet(self): """ - Download the data set, if the data set is not download. - """ + Download the data set, if the data set is not download. + """ try: # download and extract movie_reviews.zip paddle.dataset.common.download( diff --git a/python/paddle/incubate/hapi/datasets/movielens.py b/python/paddle/text/datasets/movielens.py similarity index 87% rename from python/paddle/incubate/hapi/datasets/movielens.py rename to python/paddle/text/datasets/movielens.py index 228e9dc6d477cf539683963dc6ddaa3c02c8fe95..75b59cfbb0d8177e0ced784904962ef777b289cb 100644 --- a/python/paddle/incubate/hapi/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -24,7 +24,7 @@ import six import paddle from paddle.io import Dataset import paddle.compat as cpt -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['Movielens'] @@ -106,29 +106,29 @@ class Movielens(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Movielens + import paddle + from paddle.text.datasets import Movielens - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, category, title, rating): - return paddle.sum(category), paddle.sum(title), paddle.sum(rating) + def forward(self, category, title, rating): + return paddle.sum(category), paddle.sum(title), paddle.sum(rating) - paddle.disable_static() + paddle.disable_static() - movielens = Movielens(mode='train') + movielens = Movielens(mode='train') - for i in range(10): - category, title, rating = movielens[i][-3:] - category = paddle.to_tensor(category) - title = paddle.to_tensor(title) - rating = paddle.to_tensor(rating) + for i in range(10): + category, title, rating = movielens[i][-3:] + category = paddle.to_tensor(category) + title = paddle.to_tensor(title) + rating = paddle.to_tensor(rating) - model = SimpleNet() - category, title, rating = model(category, title, rating) - print(category.numpy().shape, title.numpy().shape, rating.numpy().shape) + model = SimpleNet() + category, title, rating = model(category, title, rating) + print(category.numpy().shape, title.numpy().shape, rating.numpy().shape) """ diff --git a/python/paddle/incubate/hapi/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py similarity index 78% rename from python/paddle/incubate/hapi/datasets/uci_housing.py rename to python/paddle/text/datasets/uci_housing.py index c1f2c4a5bb5d9d60ba1316e3e2a5f174df94fe99..a0d465eb1775431ffa0527dfae8031bebd6fc340 100644 --- a/python/paddle/incubate/hapi/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -17,9 +17,8 @@ from __future__ import print_function import six import numpy as np -import paddle.dataset.common from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ["UCIHousing"] @@ -50,28 +49,28 @@ class UCIHousing(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import UCIHousing + import paddle + from paddle.text.datasets import UCIHousing - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, feature, target): - return paddle.sum(feature), target + def forward(self, feature, target): + return paddle.sum(feature), target - paddle.disable_static() + paddle.disable_static() - uci_housing = UCIHousing(mode='train') + uci_housing = UCIHousing(mode='train') - for i in range(10): - feature, target = uci_housing[i] - feature = paddle.to_tensor(feature) - target = paddle.to_tensor(target) + for i in range(10): + feature, target = uci_housing[i] + feature = paddle.to_tensor(feature) + target = paddle.to_tensor(target) - model = SimpleNet() - feature, target = model(feature, target) - print(feature.numpy().shape, target.numpy()) + model = SimpleNet() + feature, target = model(feature, target) + print(feature.numpy().shape, target.numpy()) """ diff --git a/python/paddle/incubate/hapi/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py similarity index 78% rename from python/paddle/incubate/hapi/datasets/wmt14.py rename to python/paddle/text/datasets/wmt14.py index b495ea931a80425b8e24b81cdf8fdfd2c0920a3e..36cb6dfd3e5b7652da3e4e9233dd5b16076a53b6 100644 --- a/python/paddle/incubate/hapi/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -20,7 +20,7 @@ import gzip from paddle.io import Dataset import paddle.compat as cpt -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['WMT14'] @@ -60,29 +60,29 @@ class WMT14(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import WMT14 + import paddle + from paddle.text.datasets import WMT14 - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, src_ids, trg_ids, trg_ids_next): - return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) + def forward(self, src_ids, trg_ids, trg_ids_next): + return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) - paddle.disable_static() + paddle.disable_static() - wmt14 = WMT14(mode='train', dict_size=50) + wmt14 = WMT14(mode='train', dict_size=50) - for i in range(10): - src_ids, trg_ids, trg_ids_next = wmt14[i] - src_ids = paddle.to_tensor(src_ids) - trg_ids = paddle.to_tensor(trg_ids) - trg_ids_next = paddle.to_tensor(trg_ids_next) + for i in range(10): + src_ids, trg_ids, trg_ids_next = wmt14[i] + src_ids = paddle.to_tensor(src_ids) + trg_ids = paddle.to_tensor(trg_ids) + trg_ids_next = paddle.to_tensor(trg_ids_next) - model = SimpleNet() - src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) - print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) + model = SimpleNet() + src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) + print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) """ @@ -173,6 +173,25 @@ class WMT14(Dataset): return len(self.src_ids) def get_dict(self, reverse=False): + """ + Get the source and target dictionary. + + Args: + reverse (bool): wether to reverse key and value in dictionary, + i.e. key: value to value: key. + + Returns: + Two dictionaries, the source and target dictionary. + + Examples: + + .. code-block:: python + + from paddle.text.datasets import WMT14 + wmt14 = WMT14(mode='train', dict_size=50) + src_dict, trg_dict = wmt14.get_dict() + """ + src_dict, trg_dict = self.src_dict, self.trg_dict if reverse: src_dict = {v: k for k, v in six.iteritems(src_dict)} trg_dict = {v: k for k, v in six.iteritems(trg_dict)} diff --git a/python/paddle/incubate/hapi/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py similarity index 79% rename from python/paddle/incubate/hapi/datasets/wmt16.py rename to python/paddle/text/datasets/wmt16.py index 6d3cb8bfacadd15f6c0f973a09dbf544bbc396c0..03a62e9347035101f77cec971c32164b97dd844f 100644 --- a/python/paddle/incubate/hapi/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -25,7 +25,7 @@ from collections import defaultdict import paddle from paddle.io import Dataset import paddle.compat as cpt -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['WMT16'] @@ -77,29 +77,29 @@ class WMT16(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import WMT16 + import paddle + from paddle.text.datasets import WMT16 - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, src_ids, trg_ids, trg_ids_next): - return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) + def forward(self, src_ids, trg_ids, trg_ids_next): + return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) - paddle.disable_static() + paddle.disable_static() - wmt16 = WMT16(mode='train', src_dict_size=50, trg_dict_size=50) + wmt16 = WMT16(mode='train', src_dict_size=50, trg_dict_size=50) - for i in range(10): - src_ids, trg_ids, trg_ids_next = wmt16[i] - src_ids = paddle.to_tensor(src_ids) - trg_ids = paddle.to_tensor(trg_ids) - trg_ids_next = paddle.to_tensor(trg_ids_next) + for i in range(10): + src_ids, trg_ids, trg_ids_next = wmt16[i] + src_ids = paddle.to_tensor(src_ids) + trg_ids = paddle.to_tensor(trg_ids) + trg_ids_next = paddle.to_tensor(trg_ids_next) - model = SimpleNet() - src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) - print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) + model = SimpleNet() + src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) + print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) """ @@ -222,21 +222,29 @@ class WMT16(Dataset): def get_dict(self, lang, reverse=False): """ - return the word dictionary for the specified language. - - Args: - lang(string): A string indicating which language is the source - language. Available options are: "en" for English - and "de" for Germany. - reverse(bool): If reverse is set to False, the returned python - dictionary will use word as key and use index as value. - If reverse is set to True, the returned python - dictionary will use index as key and word as value. - - Returns: - dict: The word dictionary for the specific language. - """ + return the word dictionary for the specified language. + + Args: + lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + reverse(bool): If reverse is set to False, the returned python + dictionary will use word as key and use index as value. + If reverse is set to True, the returned python + dictionary will use index as key and word as value. + + Returns: + dict: The word dictionary for the specific language. + + Examples: + + .. code-block:: python + + from paddle.text.datasets import WMT16 + wmt16 = WMT16(mode='train', src_dict_size=50, trg_dict_size=50) + en_dict = wmt16.get_dict('en') + """ dict_size = self.src_dict_size if lang == self.lang else self.trg_dict_size dict_path = os.path.join(paddle.dataset.common.DATA_HOME, @@ -244,4 +252,4 @@ class WMT16(Dataset): assert os.path.exists(dict_path), "Word dictionary does not exist. " "Please invoke paddle.dataset.wmt16.train/test/validation first " "to build the dictionary." - return _load_dict(lang, dict_size) + return self._load_dict(lang, dict_size) diff --git a/python/paddle/incubate/hapi/text/text.py b/python/paddle/text/text.py similarity index 98% rename from python/paddle/incubate/hapi/text/text.py rename to python/paddle/text/text.py index a2940fbe6cf483bce905c596a4b50294129fab54..a0fa4791c5b1ca3dd5cfe85b03f6db9353803ba9 100644 --- a/python/paddle/incubate/hapi/text/text.py +++ b/python/paddle/text/text.py @@ -227,7 +227,7 @@ class BasicLSTMCell(RNNCell): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import BasicLSTMCell, RNN + from paddle.text import BasicLSTMCell, RNN inputs = paddle.rand((2, 4, 32)) cell = BasicLSTMCell(input_size=32, hidden_size=64) @@ -358,7 +358,7 @@ class BasicGRUCell(RNNCell): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import BasicGRUCell, RNN + from paddle.text import BasicGRUCell, RNN inputs = paddle.rand((2, 4, 32)) cell = BasicGRUCell(input_size=32, hidden_size=64) @@ -495,7 +495,7 @@ class RNN(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import StackedLSTMCell, RNN + from paddle.text import StackedLSTMCell, RNN inputs = paddle.rand((2, 4, 32)) cell = StackedLSTMCell(input_size=32, hidden_size=64) @@ -648,7 +648,7 @@ class StackedRNNCell(RNNCell): .. code-block:: python - from paddle.incubate.hapi.text import BasicLSTMCell, StackedRNNCell + from paddle.text import BasicLSTMCell, StackedRNNCell cells = [BasicLSTMCell(32, 32), BasicLSTMCell(32, 32)] stack_rnn = StackedRNNCell(cells) @@ -789,7 +789,7 @@ class StackedLSTMCell(RNNCell): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import StackedLSTMCell, RNN + from paddle.text import StackedLSTMCell, RNN inputs = paddle.rand((2, 4, 32)) cell = StackedLSTMCell(input_size=32, hidden_size=64) @@ -948,7 +948,7 @@ class LSTM(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import LSTM + from paddle.text import LSTM inputs = paddle.rand((2, 4, 32)) lstm = LSTM(input_size=32, hidden_size=64, num_layers=2) @@ -1023,7 +1023,7 @@ class BidirectionalRNN(Layer): .. code-block:: python import paddle - from paddle.incubate.hapi.text import StackedLSTMCell, BidirectionalRNN + from paddle.text import StackedLSTMCell, BidirectionalRNN inputs = paddle.rand((2, 4, 32)) cell_fw = StackedLSTMCell(32, 64) @@ -1215,7 +1215,7 @@ class BidirectionalLSTM(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import BidirectionalLSTM + from paddle.text import BidirectionalLSTM inputs = paddle.rand((2, 4, 32)) bi_lstm = BidirectionalLSTM(input_size=32, hidden_size=64, num_layers=2) @@ -1384,7 +1384,7 @@ class StackedGRUCell(RNNCell): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import StackedGRUCell, RNN + from paddle.text import StackedGRUCell, RNN inputs = paddle.rand((2, 4, 32)) cell = StackedGRUCell(input_size=32, hidden_size=64) @@ -1524,7 +1524,7 @@ class GRU(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import GRU + from paddle.text import GRU inputs = paddle.rand((2, 4, 32)) gru = GRU(input_size=32, hidden_size=64, num_layers=2) @@ -1644,7 +1644,7 @@ class BidirectionalGRU(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import BidirectionalGRU + from paddle.text import BidirectionalGRU inputs = paddle.rand((2, 4, 32)) bi_gru = BidirectionalGRU(input_size=32, hidden_size=64, num_layers=2) @@ -1802,7 +1802,7 @@ class DynamicDecode(Layer): import paddle import paddle.fluid as fluid from paddle.fluid.layers import BeamSearchDecoder - from paddle.incubate.hapi.text import StackedLSTMCell, DynamicDecode + from paddle.text import StackedLSTMCell, DynamicDecode paddle.disable_static() @@ -2033,7 +2033,7 @@ class Conv1dPoolLayer(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import Conv1dPoolLayer + from paddle.text import Conv1dPoolLayer # input: [batch_size, num_channels, sequence_length] input = paddle.rand((2, 32, 4)) @@ -2162,7 +2162,7 @@ class CNNEncoder(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import CNNEncoder + from paddle.text import CNNEncoder # input: [batch_size, num_channels, sequence_length] input = paddle.rand((2, 32, 8)) @@ -2273,10 +2273,10 @@ class TransformerCell(RNNCell): import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Embedding, Linear - from paddle.incubate.hapi.text import TransformerDecoder - from paddle.incubate.hapi.text import TransformerCell - from paddle.incubate.hapi.text import TransformerBeamSearchDecoder - from paddle.incubate.hapi.text import DynamicDecode + from paddle.text import TransformerDecoder + from paddle.text import TransformerCell + from paddle.text import TransformerBeamSearchDecoder + from paddle.text import DynamicDecode paddle.disable_static() @@ -2440,10 +2440,10 @@ class TransformerBeamSearchDecoder(layers.BeamSearchDecoder): import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Embedding, Linear - from paddle.incubate.hapi.text import TransformerDecoder - from paddle.incubate.hapi.text import TransformerCell - from paddle.incubate.hapi.text import TransformerBeamSearchDecoder - from paddle.incubate.hapi.text import DynamicDecode + from paddle.text import TransformerDecoder + from paddle.text import TransformerCell + from paddle.text import TransformerBeamSearchDecoder + from paddle.text import DynamicDecode paddle.disable_static() @@ -2627,7 +2627,7 @@ class PrePostProcessLayer(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import PrePostProcessLayer + from paddle.text import PrePostProcessLayer # input: [batch_size, sequence_length, d_model] x = paddle.rand((2, 4, 32)) @@ -2709,7 +2709,7 @@ class MultiHeadAttention(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import MultiHeadAttention + from paddle.text import MultiHeadAttention # encoder input: [batch_size, sequence_length, d_model] query = paddle.rand((2, 4, 128)) @@ -2917,7 +2917,7 @@ class FFN(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import FFN + from paddle.text import FFN # input: [batch_size, sequence_length, d_model] x = paddle.rand((2, 4, 32)) @@ -2992,7 +2992,7 @@ class TransformerEncoderLayer(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import TransformerEncoderLayer + from paddle.text import TransformerEncoderLayer # encoder input: [batch_size, src_len, d_model] enc_input = paddle.rand((2, 4, 128)) @@ -3095,7 +3095,7 @@ class TransformerEncoder(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import TransformerEncoder + from paddle.text import TransformerEncoder # encoder input: [batch_size, src_len, d_model] enc_input = paddle.rand((2, 4, 128)) @@ -3206,7 +3206,7 @@ class TransformerDecoderLayer(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import TransformerDecoderLayer + from paddle.text import TransformerDecoderLayer # decoder input: [batch_size, trg_len, d_model] dec_input = paddle.rand((2, 4, 128)) @@ -3348,7 +3348,7 @@ class TransformerDecoder(Layer): import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import TransformerDecoder + from paddle.text import TransformerDecoder # decoder input: [batch_size, trg_len, d_model] dec_input = paddle.rand((2, 4, 128)) @@ -3561,7 +3561,7 @@ class LinearChainCRF(Layer): import numpy as np import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import LinearChainCRF + from paddle.text import LinearChainCRF # emission: [batch_size, sequence_length, num_tags] emission = paddle.rand((2, 8, 5)) @@ -3689,7 +3689,7 @@ class CRFDecoding(Layer): import numpy as np import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import CRFDecoding + from paddle.text import CRFDecoding # emission: [batch_size, sequence_length, num_tags] emission = paddle.rand((2, 8, 5)) @@ -3858,7 +3858,7 @@ class SequenceTagging(Layer): import numpy as np import paddle import paddle.fluid as fluid - from paddle.incubate.hapi.text import SequenceTagging + from paddle.text import SequenceTagging # word: [batch_size, sequence_length] # dummy input just for example diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index 85d0e133fa406df414977c0d69e0537ab5833891..f6299980b3e5c0bd0c7551b6b51c9b067d7960b5 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -17,8 +17,9 @@ from .profiler import ProfilerOptions from .profiler import Profiler from .profiler import get_profiler from .deprecated import deprecated +from . import download -__all__ = ['dump_config', 'Ploter', 'deprecated'] +__all__ = ['dump_config', 'Ploter', 'deprecated', 'download'] #TODO: define new api under this directory # __all__ = ['unique_name', diff --git a/python/paddle/incubate/hapi/download.py b/python/paddle/utils/download.py similarity index 99% rename from python/paddle/incubate/hapi/download.py rename to python/paddle/utils/download.py index 9d935e48995742ca8dfadce79cb2ce7395051a29..d8c0a2fc8c28450108a01c57fb1d2c1f7303101c 100644 --- a/python/paddle/incubate/hapi/download.py +++ b/python/paddle/utils/download.py @@ -26,7 +26,6 @@ import tarfile import zipfile import time from collections import OrderedDict -from paddle.fluid.dygraph.parallel import ParallelEnv try: from tqdm import tqdm @@ -156,6 +155,9 @@ def get_path_from_url(url, root_dir, md5sum=None, check_exist=True): Returns: str: a local path to save downloaded models & weights & datasets. """ + + from paddle.fluid.dygraph.parallel import ParallelEnv + assert is_url(url), "downloading from {} not a url".format(url) # parse path after download to decompress under root_dir fullpath = _map_path(url, root_dir) diff --git a/python/paddle/utils/plotcurve.py b/python/paddle/utils/plotcurve.py deleted file mode 100644 index 9c298acf01db66459ca163bf1297f8c7d2be6cb0..0000000000000000000000000000000000000000 --- a/python/paddle/utils/plotcurve.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Plot training and testing curve from paddle log. - -It takes input from a file or stdin, and output to a file or stdout. - -Note: must have numpy and matplotlib installed in order to use this tool. - -usage: Plot training and testing curves from paddle log file. - [-h] [-i INPUT] [-o OUTPUT] [--format FORMAT] [key [key ...]] - -positional arguments: - key keys of scores to plot, the default will be AvgCost - -optional arguments: - -h, --help show this help message and exit - -i INPUT, --input INPUT - input filename of paddle log, default will be standard - input - -o OUTPUT, --output OUTPUT - output filename of figure, default will be standard - output - --format FORMAT figure format(png|pdf|ps|eps|svg) - - -The keys must be in the order of paddle output(!!!). - -For example, paddle.INFO contains the following log - I0406 21:26:21.325584 3832 Trainer.cpp:601] Pass=0 Batch=7771 AvgCost=0.624935 Eval: error=0.260972 - -To use this script to generate plot for AvgCost, error: - python plotcurve.py -i paddle.INFO -o figure.png AvgCost error -""" - -import six -import sys -import matplotlib -# the following line is added immediately after import matplotlib -# and before import pylot. The purpose is to ensure the plotting -# works even under remote login (i.e. headless display) -matplotlib.use('Agg') -from matplotlib import cm -import matplotlib.pyplot as pyplot -import numpy -import argparse -import re -import os - - -def plot_paddle_curve(keys, inputfile, outputfile, format='png', - show_fig=False): - """Plot curves from paddle log and save to outputfile. - - :param keys: a list of strings to be plotted, e.g. AvgCost - :param inputfile: a file object for input - :param outputfile: a file object for output - :return: None - """ - pass_pattern = r"Pass=([0-9]*)" - test_pattern = r"Test samples=([0-9]*)" - if not keys: - keys = ['AvgCost'] - for k in keys: - pass_pattern += r".*?%s=([0-9e\-\.]*)" % k - test_pattern += r".*?%s=([0-9e\-\.]*)" % k - data = [] - test_data = [] - compiled_pattern = re.compile(pass_pattern) - compiled_test_pattern = re.compile(test_pattern) - for line in inputfile: - found = compiled_pattern.search(line) - found_test = compiled_test_pattern.search(line) - if found: - data.append([float(x) for x in found.groups()]) - if found_test: - test_data.append([float(x) for x in found_test.groups()]) - x = numpy.array(data) - x_test = numpy.array(test_data) - if x.shape[0] <= 0: - sys.stderr.write("No data to plot. Exiting!\n") - return - m = len(keys) + 1 - for i in six.moves.xrange(1, m): - pyplot.plot( - x[:, 0], - x[:, i], - color=cm.jet(1.0 * (i - 1) / (2 * m)), - label=keys[i - 1]) - if (x_test.shape[0] > 0): - pyplot.plot( - x[:, 0], - x_test[:, i], - color=cm.jet(1.0 - 1.0 * (i - 1) / (2 * m)), - label="Test " + keys[i - 1]) - pyplot.xlabel('number of epoch') - pyplot.legend(loc='best') - if show_fig: - pyplot.show() - pyplot.savefig(outputfile, bbox_inches='tight') - pyplot.clf() - - -def main(argv): - """ - main method of plotting curves. - """ - cmdparser = argparse.ArgumentParser( - "Plot training and testing curves from paddle log file.") - cmdparser.add_argument( - 'key', nargs='*', help='keys of scores to plot, the default is AvgCost') - cmdparser.add_argument( - '-i', - '--input', - help='input filename of paddle log, ' - 'default will be standard input') - cmdparser.add_argument( - '-o', - '--output', - help='output filename of figure, ' - 'default will be standard output') - cmdparser.add_argument('--format', help='figure format(png|pdf|ps|eps|svg)') - args = cmdparser.parse_args(argv) - keys = args.key - if args.input: - inputfile = open(args.input) - else: - inputfile = sys.stdin - format = args.format - if args.output: - outputfile = open(args.output, 'wb') - if not format: - format = os.path.splitext(args.output)[1] - if not format: - format = 'png' - else: - outputfile = sys.stdout - plot_paddle_curve(keys, inputfile, outputfile, format) - inputfile.close() - outputfile.close() - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/python/paddle/utils/preprocess_img.py b/python/paddle/utils/preprocess_img.py deleted file mode 100644 index e54393fa4a029a510699e3e2bafef9f4d78c51e0..0000000000000000000000000000000000000000 --- a/python/paddle/utils/preprocess_img.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import random -import numpy as np -import PIL.Image as Image -from six.moves import cStringIO as StringIO -from . import preprocess_util -from .image_util import crop_img - - -def resize_image(img, target_size): - """ - Resize an image so that the shorter edge has length target_size. - img: the input image to be resized. - target_size: the target resized image size. - """ - percent = (target_size / float(min(img.size[0], img.size[1]))) - resized_size = int(round(img.size[0] * percent)),\ - int(round(img.size[1] * percent)) - img = img.resize(resized_size, Image.ANTIALIAS) - return img - - -class DiskImage: - """ - A class of image data on disk. - """ - - def __init__(self, path, target_size): - """ - path: path of the image. - target_size: target resize size. - """ - self.path = path - self.target_size = target_size - self.img = None - pass - - def read_image(self): - if self.img is None: - print("reading: " + self.path) - image = resize_image(Image.open(self.path), self.target_size) - self.img = image - - def convert_to_array(self): - self.read_image() - np_array = np.array(self.img) - if len(np_array.shape) == 3: - np_array = np.swapaxes(np_array, 1, 2) - np_array = np.swapaxes(np_array, 1, 0) - return np_array - - def convert_to_paddle_format(self): - """ - convert the image into the paddle batch format. - """ - self.read_image() - output = StringIO() - self.img.save(output, "jpeg") - contents = output.getvalue() - return contents - - -class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): - """ - A class to process data for image classification. - """ - - def __init__(self, data_path, target_size, color=True): - """ - data_path: the path to store the training data and batches. - target_size: processed image size in a batch. - color: whether to use color images. - """ - preprocess_util.DatasetCreater.__init__(self, data_path) - self.target_size = target_size - self.color = color - self.keys = ["images", "labels"] - self.permute_key = "labels" - - def create_meta_file(self, data): - """ - Create a meta file for image classification. - The meta file contains the meam image, as well as some configs. - data: the training Dataaet. - """ - output_path = os.path.join(self.data_path, self.batch_dir_name, - self.meta_filename) - if self.color: - mean_img = np.zeros((3, self.target_size, self.target_size)) - else: - mean_img = np.zeros((self.target_size, self.target_size)) - for d in data.data: - img = d[0].convert_to_array() - cropped_img = crop_img(img, self.target_size, self.color) - mean_img += cropped_img - mean_img /= len(data.data) - mean_img = mean_img.astype('int32').flatten() - preprocess_util.save_file({ - "data_mean": mean_img, - "image_size": self.target_size, - "mean_image_size": self.target_size, - "num_classes": self.num_classes, - "color": self.color - }, output_path) - pass - - def create_dataset_from_list(self, path): - data = [] - label_set = [] - for line in open(path): - items = line.rstrip.split() - image_path = items[0] - label_name = items[1] - if not label_name in label_set: - label_set[label_name] = len(list(label_set.keys())) - img = DiskImage(path=image_path, target_size=self.target_size) - label = preprocess_util.Lablel( - label=label_set[label_name], name=label_name) - return preprocess_util.Dataset(data, self.keys), label_set - - def create_dataset_from_dir(self, path): - """ - Create a Dataset object for image classification. - Each folder in the path directory corresponds to a set of images of - this label, and the name of the folder is the name of the - path: the path of the image dataset. - """ - if self.from_list: - return self.create_dataset_from_list(path) - label_set = preprocess_util.get_label_set_from_dir(path) - data = [] - for l_name in list(label_set.keys()): - image_paths = preprocess_util.list_images( - os.path.join(path, l_name)) - for p in image_paths: - img = DiskImage(path=p, target_size=self.target_size) - label = preprocess_util.Label( - label=label_set[l_name], name=l_name) - data.append((img, label)) - random.shuffle(data) - return preprocess_util.Dataset(data, self.keys), label_set diff --git a/python/paddle/utils/preprocess_util.py b/python/paddle/utils/preprocess_util.py deleted file mode 100644 index 471cb07c84bc31a34d659e9ccc8bdd57442b8489..0000000000000000000000000000000000000000 --- a/python/paddle/utils/preprocess_util.py +++ /dev/null @@ -1,362 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import six.moves.cPickle as pickle -import random -import collections - - -def save_file(data, filename): - """ - Save data into pickle format. - data: the data to save. - filename: the output filename. - """ - pickle.dump(data, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) - - -def save_list(l, outfile): - """ - Save a list of string into a text file. There is one line for each string. - l: the list of string to save - outfile: the output file - """ - open(outfile, "w").write("\n".join(l)) - - -def exclude_pattern(f): - """ - Return whether f is in the exclude pattern. - Exclude the files that starts with . or ends with ~. - """ - return f.startswith(".") or f.endswith("~") - - -def list_dirs(path): - """ - Return a list of directories in path. Exclude all the directories that - start with '.'. - path: the base directory to search over. - """ - return [ - os.path.join(path, d) for d in next(os.walk(path))[1] - if not exclude_pattern(d) - ] - - -def list_images(path, exts=set(["jpg", "png", "bmp", "jpeg"])): - """ - Return a list of images in path. - path: the base directory to search over. - exts: the extensions of the images to find. - """ - return [os.path.join(path, d) for d in os.listdir(path) \ - if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)\ - and os.path.splitext(d)[-1][1:] in exts] - - -def list_files(path): - """ - Return a list of files in path. - path: the base directory to search over. - exts: the extensions of the images to find. - """ - return [os.path.join(path, d) for d in os.listdir(path) \ - if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)] - - -def get_label_set_from_dir(path): - """ - Return a dictionary of the labels and label ids from a path. - Assume each directory in the path corresponds to a unique label. - The keys of the dictionary is the label name. - The values of the dictionary is the label id. - """ - dirs = list_dirs(path) - return dict([(os.path.basename(d), i) for i, d in enumerate(sorted(dirs))]) - - -class Label: - """ - A class of label data. - """ - - def __init__(self, label, name): - """ - label: the id of the label. - name: the name of the label. - """ - self.label = label - self.name = name - - def convert_to_paddle_format(self): - """ - convert the image into the paddle batch format. - """ - return int(self.label) - - def __hash__(self): - return hash((self.label)) - - -class Dataset: - """ - A class to represent a dataset. A dataset contains a set of items. - Each item contains multiple slots of data. - For example: in image classification dataset, each item contains two slot, - The first slot is an image, and the second slot is a label. - """ - - def __init__(self, data, keys): - """ - data: a list of data. - Each data is a tuple containing multiple slots of data. - Each slot is an object with convert_to_paddle_format function. - keys: contains a list of keys for all the slots. - """ - self.data = data - self.keys = keys - - def check_valid(self): - for d in self.data: - assert (len(d) == len(self.keys)) - - def permute(self, key_id, num_per_batch): - """ - Permuate data for batching. It supports two types now: - 1. if key_id == None, the batching process is completely random. - 2. if key_id is not None. The batching process Permuate the data so that the key specified by key_id are - uniformly distributed in batches. See the comments of permute_by_key for details. - """ - if key_id is None: - self.uniform_permute() - else: - self.permute_by_key(key_id, num_per_batch) - - def uniform_permute(self): - """ - Permuate the data randomly. - """ - random.shuffle(self.data) - - def permute_by_key(self, key_id, num_per_batch): - """ - Permuate the data so that the key specified by key_id are - uniformly distributed in batches. - For example: if we have three labels, and the number of data - for each label are 100, 200, and 300, respectively. The number of batches is 4. - Then, the number of data for these labels is 25, 50, and 75. - """ - # Store the indices of the data that has the key value - # specified by key_id. - keyvalue_indices = collections.defaultdict(list) - for idx in range(len(self.data)): - keyvalue_indices[self.data[idx][key_id].label].append(idx) - for k in keyvalue_indices: - random.shuffle(keyvalue_indices[k]) - - num_data_per_key_batch = \ - math.ceil(num_per_batch / float(len(list(keyvalue_indices.keys())))) - - if num_data_per_key_batch < 2: - raise Exception("The number of data in a batch is too small") - - permuted_data = [] - keyvalue_readpointer = collections.defaultdict(int) - while len(permuted_data) < len(self.data): - for k in keyvalue_indices: - begin_idx = keyvalue_readpointer[k] - end_idx = int( - min(begin_idx + num_data_per_key_batch, - len(keyvalue_indices[k]))) - print("begin_idx, end_idx") - print(begin_idx, end_idx) - for idx in range(begin_idx, end_idx): - permuted_data.append(self.data[keyvalue_indices[k][idx]]) - keyvalue_readpointer[k] = end_idx - self.data = permuted_data - - -class DataBatcher: - """ - A class that is used to create batches for both training and testing - datasets. - """ - - def __init__(self, train_data, test_data, label_set): - """ - train_data, test_data: Each one is a dataset object representing - training and testing data, respectively. - label_set: a dictionary storing the mapping from label name to label id. - """ - self.train_data = train_data - self.test_data = test_data - self.label_set = label_set - self.num_per_batch = 5000 - assert (self.train_data.keys == self.test_data.keys) - - def create_batches_and_list(self, output_path, train_list_name, - test_list_name, label_set_name): - """ - Create batches for both training and testing objects. - It also create train.list and test.list to indicate the list - of the batch files for training and testing data, respectively. - """ - train_list = self.create_batches(self.train_data, output_path, "train_", - self.num_per_batch) - test_list = self.create_batches(self.test_data, output_path, "test_", - self.num_per_batch) - save_list(train_list, os.path.join(output_path, train_list_name)) - save_list(test_list, os.path.join(output_path, test_list_name)) - save_file(self.label_set, os.path.join(output_path, label_set_name)) - - def create_batches(self, - data, - output_path, - prefix="", - num_data_per_batch=5000): - """ - Create batches for a Dataset object. - data: the Dataset object to process. - output_path: the output path of the batches. - prefix: the prefix of each batch. - num_data_per_batch: number of data in each batch. - """ - num_batches = int(math.ceil(len(data.data) / float(num_data_per_batch))) - batch_names = [] - data.check_valid() - num_slots = len(data.keys) - for i in range(num_batches): - batch_name = os.path.join(output_path, prefix + "batch_%03d" % i) - out_data = dict([(k, []) for k in data.keys]) - begin_idx = i * num_data_per_batch - end_idx = min((i + 1) * num_data_per_batch, len(data.data)) - for j in range(begin_idx, end_idx): - for slot_id in range(num_slots): - out_data[data.keys[slot_id]].\ - append(data.data[j][slot_id].convert_to_paddle_format()) - save_file(out_data, batch_name) - batch_names.append(batch_name) - return batch_names - - -class DatasetCreater(object): - """ - A virtual class for creating datasets. - The derived class needs to implement the following methods: - - create_dataset() - - create_meta_file() - """ - - def __init__(self, data_path): - """ - data_path: the path to store the training data and batches. - train_dir_name: relative training data directory. - test_dir_name: relative testing data directory. - batch_dir_name: relative batch directory. - num_per_batch: the number of data in a batch. - meta_filename: the filename of the meta file. - train_list_name: training batch list name. - test_list_name: testing batch list name. - label_set: label set name. - overwrite: whether to overwrite the files if the batches are already in - the given path. - """ - self.data_path = data_path - self.train_dir_name = 'train' - self.test_dir_name = 'test' - self.batch_dir_name = 'batches' - self.num_per_batch = 50000 - self.meta_filename = "batches.meta" - self.train_list_name = "train.list" - self.test_list_name = "test.list" - self.label_set_name = "labels.pkl" - self.output_path = os.path.join(self.data_path, self.batch_dir_name) - self.overwrite = False - self.permutate_key = "labels" - self.from_list = False - - def create_meta_file(self, data): - """ - Create a meta file from training data. - data: training data given in a Dataset format. - """ - raise NotImplementedError - - def create_dataset(self, path): - """ - Create a data set object from a path. - It will use directory structure or a file list to determine dataset if - self.from_list is True. Otherwise, it will uses a file list to - determine the dataset. - path: the path of the dataset. - return a tuple of Dataset object, and a mapping from label set - to label id. - """ - if self.from_list: - return self.create_dataset_from_list(path) - else: - return self.create_dataset_from_dir(path) - - def create_dataset_from_list(self, path): - """ - Create a data set object from a path. - It will uses a file list to determine the dataset. - path: the path of the dataset. - return a tuple of Dataset object, and a mapping from label set - to label id - """ - raise NotImplementedError - - def create_dataset_from_dir(self, path): - """ - Create a data set object from a path. - It will use directory structure or a file list to determine dataset if - self.from_list is True. - path: the path of the dataset. - return a tuple of Dataset object, and a mapping from label set - to label id - """ - raise NotImplementedError - - def create_batches(self): - """ - create batches and meta file. - """ - train_path = os.path.join(self.data_path, self.train_dir_name) - test_path = os.path.join(self.data_path, self.test_dir_name) - out_path = os.path.join(self.data_path, self.batch_dir_name) - if not os.path.exists(out_path): - os.makedirs(out_path) - if (self.overwrite or not os.path.exists( - os.path.join(out_path, self.train_list_name))): - train_data, train_label_set = \ - self.create_dataset(train_path) - test_data, test_label_set = \ - self.create_dataset(test_path) - - train_data.permute( - self.keys.index(self.permutate_key), self.num_per_batch) - - assert (train_label_set == test_label_set) - data_batcher = DataBatcher(train_data, test_data, train_label_set) - data_batcher.num_per_batch = self.num_per_batch - data_batcher.create_batches_and_list( - self.output_path, self.train_list_name, self.test_list_name, - self.label_set_name) - self.num_classes = len(list(train_label_set.keys())) - self.create_meta_file(train_data) - return out_path diff --git a/python/paddle/utils/show_pb.py b/python/paddle/utils/show_pb.py deleted file mode 100644 index da7a71a665aea4d93d366e8508f438a9aba88e94..0000000000000000000000000000000000000000 --- a/python/paddle/utils/show_pb.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Show the content of proto buffer data file of PADDLE -""" - -from __future__ import print_function - -import os -import sys -from google.protobuf.internal.decoder import _DecodeVarint -import paddle.proto.DataFormat_pb2 as DataFormat - - -def read_proto(file, message): - """ - read a protobuffer struct from file, the length of the struct is stored as - a varint, then followed by the actual struct data. - @return True success, False for end of file - """ - - buf = file.read(8) - if not buf: - return False - result, pos = _DecodeVarint(buf, 0) - buf = buf[pos:] + file.read(result - len(buf) + pos) - message.ParseFromString(buf) - - return True - - -def usage(): - print("Usage: python show_pb.py PROTO_DATA_FILE", file=sys.stderr) - exit(1) - - -if __name__ == '__main__': - if len(sys.argv) < 2: - usage() - - f = open(sys.argv[1]) - header = DataFormat.DataHeader() - read_proto(f, header) - print(header) - - sample = DataFormat.DataSample() - while read_proto(f, sample): - print(sample) diff --git a/python/paddle/utils/torch2paddle.py b/python/paddle/utils/torch2paddle.py deleted file mode 100644 index 398d3aa4e02cc74b7885f7e676937d7fd254bc5e..0000000000000000000000000000000000000000 --- a/python/paddle/utils/torch2paddle.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Convert torch parameter file to paddle model files. - -Note: must have torchfile installed in order to use this tool. - -Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model -""" - -import os -import sys -import struct -import numpy as np -import torchfile -import six.moves.cPickle as pickle -import argparse - - -# save parameters -def save_layer_parameters(outfile, feats): - version = 0 - value_size = 4 - ret = "" - for feat in feats: - ret += feat.tostring() - size = len(ret) / 4 - fo = open(outfile, 'wb') - fo.write(struct.pack('iIQ', version, value_size, size)) - fo.write(ret) - fo.close() - - -def save_net_parameters(layers, params, output_path): - for i in range(len(layers)): - weight = params[i * 2] - biases = params[i * 2 + 1] - weight_file = os.path.join(output_path, '_%s.w0' % layers[i]) - biases_file = os.path.join(output_path, '_%s.wbias' % layers[i]) - print("Saving for layer %s." % layers[i]) - save_layer_parameters(weight_file, [weight]) - save_layer_parameters(biases_file, biases) - - -def load_layer_parameters(filename): - fn = open(filename, 'rb') - version, = struct.unpack('i', fn.read(4)) - value_length, = struct.unpack("I", fn.read(4)) - dtype = 'float32' if value_length == 4 else 'float64' - param_size, = struct.unpack("L", fn.read(8)) - value = np.fromfile(fn, dtype) - return value - - -def main(argv): - """ - main method of converting torch to paddle files. - :param argv: - :return: - """ - cmdparser = argparse.ArgumentParser( - "Convert torch parameter file to paddle model files.") - cmdparser.add_argument( - '-i', '--input', help='input filename of torch parameters') - cmdparser.add_argument('-l', '--layers', help='list of layer names') - cmdparser.add_argument( - '-o', '--output', help='output file path of paddle model') - - args = cmdparser.parse_args(argv) - if args.input and args.layers and args.output: - params = torchfile.load(args.input) - layers = [line.strip() for line in open(args.layers, 'r')] - save_net_parameters(layers, params, args.output) - else: - print( - 'Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model' - ) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/python/paddle/incubate/hapi/vision/__init__.py b/python/paddle/vision/__init__.py similarity index 87% rename from python/paddle/incubate/hapi/vision/__init__.py rename to python/paddle/vision/__init__.py index c9d65db18653bf842f5d95ccf472686d5b08c84d..7d28d567cefa2f0cf3ab4f7077d71ea27edc936a 100644 --- a/python/paddle/incubate/hapi/vision/__init__.py +++ b/python/paddle/vision/__init__.py @@ -13,9 +13,14 @@ # limitations under the License. from . import models -from . import transforms from .models import * + +from . import transforms from .transforms import * +from . import datasets +from .datasets import * + __all__ = models.__all__ \ - + transforms.__all__ + + transforms.__all__ \ + + datasets.__all__ diff --git a/python/paddle/incubate/hapi/datasets/utils.py b/python/paddle/vision/datasets/__init__.py similarity index 60% rename from python/paddle/incubate/hapi/datasets/utils.py rename to python/paddle/vision/datasets/__init__.py index 171f794ba9df4270727a23cc6cd039a9faa81970..6703aa4197603be2d82d930e3cd2622ff6b4cd77 100644 --- a/python/paddle/incubate/hapi/datasets/utils.py +++ b/python/paddle/vision/datasets/__init__.py @@ -12,18 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function +from . import folder +from . import mnist +from . import flowers +from . import cifar +from . import voc2012 -import os -import paddle.dataset.common +from .folder import * +from .mnist import * +from .flowers import * +from .cifar import * +from .voc2012 import * - -def _check_exists_and_download(path, url, md5, module_name, download=True): - if path and os.path.exists(path): - return path - - if download: - return paddle.dataset.common.download(url, module_name, md5) - else: - raise ValueError('{} not exists and auto download disabled'.format( - path)) +__all__ = folder.__all__ \ + + mnist.__all__ \ + + flowers.__all__ \ + + cifar.__all__ \ + + voc2012.__all__ diff --git a/python/paddle/incubate/hapi/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py similarity index 67% rename from python/paddle/incubate/hapi/datasets/cifar.py rename to python/paddle/vision/datasets/cifar.py index adfa786e615368ba90dab154924678de79104b55..1193be26da56780058beadfe15640bc76533114a 100644 --- a/python/paddle/incubate/hapi/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -20,7 +20,7 @@ import six from six.moves import cPickle as pickle from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ['Cifar10', 'Cifar100'] @@ -58,33 +58,36 @@ class Cifar10(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Cifar10 - from paddle.incubate.hapi.vision.transforms import Normalize + import paddle + import paddle.nn as nn + from paddle.vision.datasets import Cifar10 + from paddle.vision.transforms import Normalize - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - self.fc = paddle.nn.Linear(3072, 10, act='softmax') + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + self.fc = nn.Sequential( + nn.Linear(3072, 10), + nn.Softmax()) - def forward(self, image, label): - image = paddle.reshape(image, (3, -1)) - return self.fc(image), label + def forward(self, image, label): + image = paddle.reshape(image, (3, -1)) + return self.fc(image), label - paddle.disable_static() + paddle.disable_static() - normalize = Normalize(mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]) - cifar10 = Cifar10(mode='train', transform=normalize) + normalize = Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + cifar10 = Cifar10(mode='train', transform=normalize) - for i in range(10): - image, label = cifar10[i] - image = paddle.to_tensor(image) - label = paddle.to_tensor(label) + for i in range(10): + image, label = cifar10[i] + image = paddle.to_tensor(image) + label = paddle.to_tensor(label) - model = SimpleNet() - image, label = model(image, label) - print(image.numpy().shape, label.numpy().shape) + model = SimpleNet() + image, label = model(image, label) + print(image.numpy().shape, label.numpy().shape) """ @@ -164,33 +167,36 @@ class Cifar100(Cifar10): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import Cifar100 - from paddle.incubate.hapi.vision.transforms import Normalize + import paddle + import paddle.nn as nn + from paddle.vision.datasets import Cifar100 + from paddle.vision.transforms import Normalize - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - self.fc = paddle.nn.Linear(3072, 100, act='softmax') + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + self.fc = nn.Sequential( + nn.Linear(3072, 10), + nn.Softmax()) - def forward(self, image, label): - image = paddle.reshape(image, (3, -1)) - return self.fc(image), label + def forward(self, image, label): + image = paddle.reshape(image, (3, -1)) + return self.fc(image), label - paddle.disable_static() + paddle.disable_static() - normalize = Normalize(mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]) - cifar100 = Cifar100(mode='train', transform=normalize) + normalize = Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + cifar100 = Cifar100(mode='train', transform=normalize) - for i in range(10): - image, label = cifar100[i] - image = paddle.to_tensor(image) - label = paddle.to_tensor(label) + for i in range(10): + image, label = cifar100[i] + image = paddle.to_tensor(image) + label = paddle.to_tensor(label) - model = SimpleNet() - image, label = model(image, label) - print(image.numpy().shape, label.numpy().shape) + model = SimpleNet() + image, label = model(image, label) + print(image.numpy().shape, label.numpy().shape) """ diff --git a/python/paddle/incubate/hapi/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py similarity index 97% rename from python/paddle/incubate/hapi/datasets/flowers.py rename to python/paddle/vision/datasets/flowers.py index 141d2a53b577b8c9be9ac153a36c5b2fa51ded77..1c0f41123e2313d9db6f5e846d133ecdebc7f1af 100644 --- a/python/paddle/incubate/hapi/datasets/flowers.py +++ b/python/paddle/vision/datasets/flowers.py @@ -22,7 +22,7 @@ import scipy.io as scio from PIL import Image from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ["Flowers"] @@ -60,7 +60,7 @@ class Flowers(Dataset): .. code-block:: python - from paddle.incubate.hapi.datasets import Flowers + from paddle.vision.datasets import Flowers flowers = Flowers(mode='test') diff --git a/python/paddle/incubate/hapi/datasets/folder.py b/python/paddle/vision/datasets/folder.py similarity index 98% rename from python/paddle/incubate/hapi/datasets/folder.py rename to python/paddle/vision/datasets/folder.py index 358e7681eb8e64364600732f0399e6b97f0d64e0..725fd9acafbab7b6adaf07139d02da8e2c9aaada 100644 --- a/python/paddle/incubate/hapi/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -94,7 +94,7 @@ class DatasetFolder(Dataset): import tempfile import shutil import numpy as np - from paddle.incubate.hapi.datasets import DatasetFolder + from paddle.vision.datasets import DatasetFolder def make_fake_dir(): data_dir = tempfile.mkdtemp() @@ -224,7 +224,7 @@ class ImageFolder(Dataset): import tempfile import shutil import numpy as np - from paddle.incubate.hapi.datasets import ImageFolder + from paddle.vision.datasets import ImageFolder def make_fake_dir(): data_dir = tempfile.mkdtemp() diff --git a/python/paddle/incubate/hapi/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py similarity index 97% rename from python/paddle/incubate/hapi/datasets/mnist.py rename to python/paddle/vision/datasets/mnist.py index ed046e5a1d9bbcc33f3148c6ecde8a349e478cb0..a98561333921d182c0b3a3f486c90a94e79b6a3d 100644 --- a/python/paddle/incubate/hapi/datasets/mnist.py +++ b/python/paddle/vision/datasets/mnist.py @@ -19,9 +19,8 @@ import gzip import struct import numpy as np -import paddle.dataset.common from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ["MNIST"] @@ -58,7 +57,7 @@ class MNIST(Dataset): .. code-block:: python - from paddle.incubate.hapi.datasets import MNIST + from paddle.vision.datasets import MNIST mnist = MNIST(mode='test') diff --git a/python/paddle/incubate/hapi/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py similarity index 82% rename from python/paddle/incubate/hapi/datasets/voc2012.py rename to python/paddle/vision/datasets/voc2012.py index 1811c455db530710a0559c077975ab08d6a94ac3..ae14ea3016363c828d17ba34aca8e1a6663ecf76 100644 --- a/python/paddle/incubate/hapi/datasets/voc2012.py +++ b/python/paddle/vision/datasets/voc2012.py @@ -20,7 +20,7 @@ import numpy as np from PIL import Image from paddle.io import Dataset -from .utils import _check_exists_and_download +from paddle.dataset.common import _check_exists_and_download __all__ = ["VOC2012"] @@ -52,28 +52,28 @@ class VOC2012(Dataset): .. code-block:: python - import paddle - from paddle.incubate.hapi.datasets import VOC2012 + import paddle + from paddle.vision.datasets import VOC2012 - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() - def forward(self, image, label): - return paddle.sum(image), label + def forward(self, image, label): + return paddle.sum(image), label - paddle.disable_static() + paddle.disable_static() - voc2012 = VOC2012(mode='train') + voc2012 = VOC2012(mode='train') - for i in range(10): - image, label= voc2012[i] - image = paddle.cast(paddle.to_tensor(image), 'float32') - label = paddle.to_tensor(label) + for i in range(10): + image, label= voc2012[i] + image = paddle.cast(paddle.to_tensor(image), 'float32') + label = paddle.to_tensor(label) - model = SimpleNet() - image, label= model(image, label) - print(image.numpy().shape, label.numpy().shape) + model = SimpleNet() + image, label= model(image, label) + print(image.numpy().shape, label.numpy().shape) """ diff --git a/python/paddle/incubate/hapi/vision/models/__init__.py b/python/paddle/vision/models/__init__.py similarity index 100% rename from python/paddle/incubate/hapi/vision/models/__init__.py rename to python/paddle/vision/models/__init__.py diff --git a/python/paddle/incubate/hapi/vision/models/lenet.py b/python/paddle/vision/models/lenet.py similarity index 96% rename from python/paddle/incubate/hapi/vision/models/lenet.py rename to python/paddle/vision/models/lenet.py index 169f70562f6edfe1773a1c8d75004c25831cedcb..c2d4be7cda10d580af44154e6a03e0871ec20706 100644 --- a/python/paddle/incubate/hapi/vision/models/lenet.py +++ b/python/paddle/vision/models/lenet.py @@ -30,7 +30,7 @@ class LeNet(fluid.dygraph.Layer): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import LeNet + from paddle.vision.models import LeNet model = LeNet() """ diff --git a/python/paddle/incubate/hapi/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py similarity index 98% rename from python/paddle/incubate/hapi/vision/models/mobilenetv1.py rename to python/paddle/vision/models/mobilenetv1.py index 5022a065a597553bc870b5da6cd5107e24b4ef0a..10defbf593dca642386e73b65094612f93dce9dc 100644 --- a/python/paddle/incubate/hapi/vision/models/mobilenetv1.py +++ b/python/paddle/vision/models/mobilenetv1.py @@ -17,7 +17,7 @@ from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear -from ...download import get_weights_path_from_url +from paddle.utils.download import get_weights_path_from_url __all__ = ['MobileNetV1', 'mobilenet_v1'] @@ -116,7 +116,7 @@ class MobileNetV1(fluid.dygraph.Layer): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import MobileNetV1 + from paddle.vision.models import MobileNetV1 model = MobileNetV1() """ @@ -291,7 +291,7 @@ def mobilenet_v1(pretrained=False, scale=1.0, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import mobilenet_v1 + from paddle.vision.models import mobilenet_v1 # build model model = mobilenet_v1() diff --git a/python/paddle/incubate/hapi/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py similarity index 97% rename from python/paddle/incubate/hapi/vision/models/mobilenetv2.py rename to python/paddle/vision/models/mobilenetv2.py index d5cbfc7b96114dd9a3c122d646f47ca26afcb743..c08fb88f8bdb234fec99ed139aa7eb6249965c79 100644 --- a/python/paddle/incubate/hapi/vision/models/mobilenetv2.py +++ b/python/paddle/vision/models/mobilenetv2.py @@ -18,7 +18,7 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear -from ...download import get_weights_path_from_url +from paddle.utils.download import get_weights_path_from_url __all__ = ['MobileNetV2', 'mobilenet_v2'] @@ -163,7 +163,7 @@ class MobileNetV2(fluid.dygraph.Layer): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import MobileNetV2 + from paddle.vision.models import MobileNetV2 model = MobileNetV2() """ @@ -267,7 +267,7 @@ def mobilenet_v2(pretrained=False, scale=1.0, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import mobilenet_v2 + from paddle.vision.models import mobilenet_v2 # build model model = mobilenet_v2() diff --git a/python/paddle/incubate/hapi/vision/models/resnet.py b/python/paddle/vision/models/resnet.py similarity index 95% rename from python/paddle/incubate/hapi/vision/models/resnet.py rename to python/paddle/vision/models/resnet.py index 858934e1c179fa75b5d3510e0e9b6c53bca8e608..da0c3e9eb3f67f0aad67cdef3c5527cb2275e844 100644 --- a/python/paddle/incubate/hapi/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -21,7 +21,7 @@ import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.container import Sequential -from ...download import get_weights_path_from_url +from paddle.utils.download import get_weights_path_from_url __all__ = [ 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152' @@ -180,8 +180,8 @@ class ResNet(fluid.dygraph.Layer): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import ResNet - from paddle.incubate.hapi.vision.models.resnet import BottleneckBlock, BasicBlock + from paddle.vision.models import ResNet + from paddle.vision.models.resnet import BottleneckBlock, BasicBlock resnet50 = ResNet(BottleneckBlock, 50) @@ -292,7 +292,7 @@ def resnet18(pretrained=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import resnet18 + from paddle.vision.models import resnet18 # build model model = resnet18() @@ -312,7 +312,7 @@ def resnet34(pretrained=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import resnet34 + from paddle.vision.models import resnet34 # build model model = resnet34() @@ -332,7 +332,7 @@ def resnet50(pretrained=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import resnet50 + from paddle.vision.models import resnet50 # build model model = resnet50() @@ -352,7 +352,7 @@ def resnet101(pretrained=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import resnet101 + from paddle.vision.models import resnet101 # build model model = resnet101() @@ -372,7 +372,7 @@ def resnet152(pretrained=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import resnet152 + from paddle.vision.models import resnet152 # build model model = resnet152() diff --git a/python/paddle/incubate/hapi/vision/models/vgg.py b/python/paddle/vision/models/vgg.py similarity index 94% rename from python/paddle/incubate/hapi/vision/models/vgg.py rename to python/paddle/vision/models/vgg.py index 4352a768eb7206ca30acead580a64a7d04b7701b..8bfacda2476d0e24e549513b379181bf47e40d45 100644 --- a/python/paddle/incubate/hapi/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -16,7 +16,7 @@ import paddle.fluid as fluid from paddle.nn import Conv2d, Pool2D, BatchNorm, Linear, ReLU, Softmax from paddle.fluid.dygraph.container import Sequential -from ...download import get_weights_path_from_url +from paddle.utils.download import get_weights_path_from_url __all__ = [ 'VGG', @@ -65,8 +65,8 @@ class VGG(fluid.dygraph.Layer): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import VGG - from paddle.incubate.hapi.vision.models.vgg import make_layers + from paddle.vision.models import VGG + from paddle.vision.models.vgg import make_layers vgg11_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] @@ -160,7 +160,7 @@ def vgg11(pretrained=False, batch_norm=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import vgg11 + from paddle.vision.models import vgg11 # build model model = vgg11() @@ -184,7 +184,7 @@ def vgg13(pretrained=False, batch_norm=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import vgg13 + from paddle.vision.models import vgg13 # build model model = vgg13() @@ -208,7 +208,7 @@ def vgg16(pretrained=False, batch_norm=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import vgg16 + from paddle.vision.models import vgg16 # build model model = vgg16() @@ -232,7 +232,7 @@ def vgg19(pretrained=False, batch_norm=False, **kwargs): Examples: .. code-block:: python - from paddle.incubate.hapi.vision.models import vgg19 + from paddle.vision.models import vgg19 # build model model = vgg19() diff --git a/python/paddle/incubate/hapi/vision/transforms/__init__.py b/python/paddle/vision/transforms/__init__.py similarity index 100% rename from python/paddle/incubate/hapi/vision/transforms/__init__.py rename to python/paddle/vision/transforms/__init__.py diff --git a/python/paddle/incubate/hapi/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py similarity index 96% rename from python/paddle/incubate/hapi/vision/transforms/functional.py rename to python/paddle/vision/transforms/functional.py index b118ee3fc7553dc7d02028ae273be33166829635..b5668fa8c7d6812664512a58faf836b5d9f09300 100644 --- a/python/paddle/incubate/hapi/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -64,7 +64,7 @@ def flip(image, code): .. code-block:: python import numpy as np - from paddle.incubate.hapi.vision.transforms import functional as F + from paddle.vision.transforms import functional as F fake_img = np.random.rand(224, 224, 3) @@ -94,7 +94,7 @@ def resize(img, size, interpolation=cv2.INTER_LINEAR): .. code-block:: python import numpy as np - from paddle.incubate.hapi.vision.transforms import functional as F + from paddle.vision.transforms import functional as F fake_img = np.random.rand(256, 256, 3) @@ -155,7 +155,7 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'): import numpy as np - from paddle.incubate.hapi.vision.transforms.functional import pad + from paddle.vision.transforms.functional import pad fake_img = np.random.rand(500, 500, 3).astype('float32') @@ -243,7 +243,7 @@ def rotate(img, import numpy as np - from paddle.incubate.hapi.vision.transforms.functional import rotate + from paddle.vision.transforms.functional import rotate fake_img = np.random.rand(500, 500, 3).astype('float32') @@ -305,7 +305,7 @@ def to_grayscale(img, num_output_channels=1): import numpy as np - from paddle.incubate.hapi.vision.transforms.functional import to_grayscale + from paddle.vision.transforms.functional import to_grayscale fake_img = np.random.rand(500, 500, 3).astype('float32') diff --git a/python/paddle/incubate/hapi/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py similarity index 94% rename from python/paddle/incubate/hapi/vision/transforms/transforms.py rename to python/paddle/vision/transforms/transforms.py index d46faa0685aa13790be217e0c99ab407790dd2ca..14809e0c1acaa1b6d5a494e6e3df1801e1c8f61b 100644 --- a/python/paddle/incubate/hapi/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -76,8 +76,8 @@ class Compose(object): .. code-block:: python - from paddle.incubate.hapi.datasets import Flowers - from paddle.incubate.hapi.vision.transforms import Compose, ColorJitter, Resize + from paddle.vision.datasets import Flowers + from paddle.vision.transforms import Compose, ColorJitter, Resize transform = Compose([ColorJitter(), Resize(size=608)]) flowers = Flowers(mode='test', transform=transform) @@ -130,9 +130,9 @@ class BatchCompose(object): import numpy as np from paddle.io import DataLoader - from paddle.incubate.hapi import set_device - from paddle.incubate.hapi.datasets import Flowers - from paddle.incubate.hapi.vision.transforms import Compose, BatchCompose, Resize + from paddle import set_device + from paddle.vision.datasets import Flowers + from paddle.vision.transforms import Compose, BatchCompose, Resize class NormalizeBatch(object): def __init__(self, @@ -222,7 +222,7 @@ class Resize(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import Resize + from paddle.vision.transforms import Resize transform = Resize(size=224) @@ -259,7 +259,7 @@ class RandomResizedCrop(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomResizedCrop + from paddle.vision.transforms import RandomResizedCrop transform = RandomResizedCrop(224) @@ -336,7 +336,7 @@ class CenterCropResize(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import CenterCropResize + from paddle.vision.transforms import CenterCropResize transform = CenterCropResize(224) @@ -380,7 +380,7 @@ class CenterCrop(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import CenterCrop + from paddle.vision.transforms import CenterCrop transform = CenterCrop(224) @@ -422,7 +422,7 @@ class RandomHorizontalFlip(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomHorizontalFlip + from paddle.vision.transforms import RandomHorizontalFlip transform = RandomHorizontalFlip(224) @@ -453,7 +453,7 @@ class RandomVerticalFlip(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomVerticalFlip + from paddle.vision.transforms import RandomVerticalFlip transform = RandomVerticalFlip(224) @@ -488,7 +488,7 @@ class Normalize(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import Normalize + from paddle.vision.transforms import Normalize normalize = Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) @@ -530,7 +530,7 @@ class Permute(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import Permute + from paddle.vision.transforms import Permute transform = Permute() @@ -569,7 +569,7 @@ class GaussianNoise(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import GaussianNoise + from paddle.vision.transforms import GaussianNoise transform = GaussianNoise() @@ -603,7 +603,7 @@ class BrightnessTransform(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import BrightnessTransform + from paddle.vision.transforms import BrightnessTransform transform = BrightnessTransform(0.4) @@ -642,7 +642,7 @@ class ContrastTransform(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import ContrastTransform + from paddle.vision.transforms import ContrastTransform transform = ContrastTransform(0.4) @@ -682,7 +682,7 @@ class SaturationTransform(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import SaturationTransform + from paddle.vision.transforms import SaturationTransform transform = SaturationTransform(0.4) @@ -723,7 +723,7 @@ class HueTransform(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import HueTransform + from paddle.vision.transforms import HueTransform transform = HueTransform(0.4) @@ -775,7 +775,7 @@ class ColorJitter(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import ColorJitter + from paddle.vision.transforms import ColorJitter transform = ColorJitter(0.4) @@ -822,7 +822,7 @@ class RandomCrop(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomCrop + from paddle.vision.transforms import RandomCrop transform = RandomCrop(224) @@ -909,7 +909,7 @@ class RandomErasing(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomCrop + from paddle.vision.transforms import RandomCrop transform = RandomCrop(224) @@ -995,7 +995,7 @@ class Pad(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import Pad + from paddle.vision.transforms import Pad transform = Pad(2) @@ -1051,7 +1051,7 @@ class RandomRotate(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import RandomRotate + from paddle.vision.transforms import RandomRotate transform = RandomRotate(90) @@ -1119,7 +1119,7 @@ class Grayscale(object): import numpy as np - from paddle.incubate.hapi.vision.transforms import Grayscale + from paddle.vision.transforms import Grayscale transform = Grayscale() diff --git a/python/setup.py.in b/python/setup.py.in index 5b206296bd641bf909115d1c580518afe85a37b6..64ac2b9b9a4d210c59193e117c6000986bfb07a0 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -188,12 +188,13 @@ packages=['paddle', 'paddle.fluid.incubate.fleet.parameter_server.ir', 'paddle.fluid.incubate.fleet.collective', 'paddle.fluid.incubate.fleet.utils', - 'paddle.incubate.hapi', - 'paddle.incubate.hapi.datasets', - 'paddle.incubate.hapi.vision', - 'paddle.incubate.hapi.vision.models', - 'paddle.incubate.hapi.vision.transforms', - 'paddle.incubate.hapi.text', + 'paddle.hapi', + 'paddle.vision', + 'paddle.vision.models', + 'paddle.vision.transforms', + 'paddle.vision.datasets', + 'paddle.text', + 'paddle.text.datasets', 'paddle.incubate', 'paddle.io', 'paddle.optimizer',