diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index c5be5bc142f23b5f6fdfb8062f3be8e5e764e364..bd92727545647836c02931130a1ad528889ae2bb 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -787,6 +787,15 @@ set +x multiple_card_tests="$multiple_card_tests|^$testcase$" fi else + if [[ "${#single_card_tests}" -gt 3000 ]];then + if [[ "$single_card_tests_1" == "" ]]; then + single_card_tests_1="^$testcase$" + else + single_card_tests_1="$single_card_tests_1|^$testcase$" + fi + continue + fi + if [[ "$single_card_tests" == "" ]]; then single_card_tests="^$testcase$" else @@ -800,6 +809,7 @@ set +x done <<< "$test_cases"; card_test "$single_card_tests" 1 # run cases with single GPU + card_test "$single_card_tests_1" 1 # run cases with single GPU card_test "$multiple_card_tests" 2 # run cases with two GPUs card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs if [[ "$EXIT_CODE" != "0" ]]; then diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 77a752800498de77b35336577914a5415a654aa8..59dfc5c9d0311342fc72d8400a3abddd3f6d778b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -96,6 +96,7 @@ if (WITH_TESTING) add_subdirectory(paddle/fluid/tests) add_subdirectory(paddle/fluid/contrib/tests) add_subdirectory(paddle/fluid/contrib/slim/tests) + add_subdirectory(paddle/incubate/hapi/tests) endif() install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index ad4a6d64d10adb4deb58f8e0c67c56b4f96afadf..fd6544f9dfbeede98285ef400ddd5c061ce9eb3a 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -202,6 +202,9 @@ from .tensor.stat import var #DEFINE_ALIAS # from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS + +from . import incubate +from .incubate import hapi from .fluid.dygraph.base import enable_dygraph #DEFINE_ALIAS from .fluid.dygraph.base import disable_dygraph #DEFINE_ALIAS from .fluid.framework import in_dygraph_mode #DEFINE_ALIAS diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py index 76e0e91197f15f94d14ae3c20094f6813f725c38..e6888ebc8f441e137d2483795817d6f08719c075 100644 --- a/python/paddle/incubate/__init__.py +++ b/python/paddle/incubate/__init__.py @@ -11,3 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from . import hapi + +__all__ = [] +__all__ += hapi.__all__ diff --git a/python/paddle/incubate/hapi/__init__.py b/python/paddle/incubate/hapi/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0b2321976edff0e2c13c614406fcaaa9976cc797 --- /dev/null +++ b/python/paddle/incubate/hapi/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import logger +from . import progressbar +from . import callbacks +from . import download +from . import model +from . import metrics +from . import loss +from . import datasets +from . import distributed +from . import vision + +logger.setup_logger() + +__all__ = [ + 'callbacks', + 'datasets', + 'distributed', + 'download', + 'metrics', + 'loss', + 'vision', +] + +__all__ += model.__all__ diff --git a/python/paddle/incubate/hapi/callbacks.py b/python/paddle/incubate/hapi/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..7b3c41584151c252c65b6cf95f9738b82c78731e --- /dev/null +++ b/python/paddle/incubate/hapi/callbacks.py @@ -0,0 +1,484 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.fluid.dygraph.parallel import ParallelEnv + +from .progressbar import ProgressBar + +__all__ = ['Callback', 'ProgBarLogger', 'ModelCheckpoint'] + + +def config_callbacks(callbacks=None, + model=None, + batch_size=None, + epochs=None, + steps=None, + log_freq=2, + verbose=2, + save_freq=1, + save_dir=None, + metrics=None, + mode='train'): + cbks = callbacks or [] + cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks] + if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose: + cbks = [ProgBarLogger(log_freq, verbose=verbose)] + cbks + + if not any(isinstance(k, ModelCheckpoint) for k in cbks): + cbks = cbks + [ModelCheckpoint(save_freq, save_dir)] + + cbk_list = CallbackList(cbks) + cbk_list.set_model(model) + metrics = metrics or [] if mode != 'test' else [] + params = { + 'batch_size': batch_size, + 'epochs': epochs, + 'steps': steps, + 'verbose': verbose, + 'metrics': metrics, + } + cbk_list.set_params(params) + return cbk_list + + +class CallbackList(object): + def __init__(self, callbacks=None): + # copy + self.callbacks = [c for c in callbacks] + self.params = {} + self.model = None + + def append(self, callback): + self.callbacks.append(callback) + + def __iter__(self): + return iter(self.callbacks) + + def set_params(self, params): + for c in self.callbacks: + c.set_params(params) + + def set_model(self, model): + for c in self.callbacks: + c.set_model(model) + + def _call(self, name, *args): + for c in self.callbacks: + func = getattr(c, name) + func(*args) + + def _check_mode(self, mode): + assert mode in ['train', 'eval', 'test'], \ + 'mode should be train, eval or test' + + def on_begin(self, mode, logs=None): + self._check_mode(mode) + name = 'on_{}_begin'.format(mode) + self._call(name, logs) + + def on_end(self, mode, logs=None): + self._check_mode(mode) + name = 'on_{}_end'.format(mode) + self._call(name, logs) + + def on_epoch_begin(self, epoch=None, logs=None): + self._call('on_epoch_begin', epoch, logs) + + def on_epoch_end(self, epoch=None, logs=None): + self._call('on_epoch_end', epoch, logs) + + def on_batch_begin(self, mode, step=None, logs=None): + self._check_mode(mode) + name = 'on_{}_batch_begin'.format(mode) + self._call(name, step, logs) + + def on_batch_end(self, mode, step=None, logs=None): + self._check_mode(mode) + name = 'on_{}_batch_end'.format(mode) + self._call(name, step, logs) + + +class Callback(object): + """ + Base class used to build new callbacks. + + Examples: + + .. code-block:: python + + from paddle.incubate.hapi.callbacks import Callback + + # build a simple model checkpoint callback + class ModelCheckpoint(Callback): + def __init__(self, save_freq=1, save_dir=None): + self.save_freq = save_freq + self.save_dir = save_dir + + def on_epoch_end(self, epoch, logs=None): + if self.model is not None and epoch % self.save_freq == 0: + path = '{}/{}'.format(self.save_dir, epoch) + print('save checkpoint at {}'.format(path)) + self.model.save(path) + + """ + + def __init__(self): + self.model = None + self.params = {} + + def set_params(self, params): + """ + Set parameters, which is dict. The keys contain: + + - 'batch_size': an integer. Number of samples per batch. + - 'epochs': an integer. Number of epochs. + - 'steps': an integer. Number of steps of one epoch. + - 'verbose': an integer. Verbose mode is 0, 1 or 2. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + - 'metrics': a list of str. Names of metrics, including 'loss' + and the names of hapi.Metric. + """ + self.params = params + + def set_model(self, model): + """model is instance of hapi.Model. + """ + self.model = model + + def on_train_begin(self, logs=None): + """Called at the start of training. + + Args: + logs (dict): The logs is a dict or None. + """ + + def on_train_end(self, logs=None): + """Called at the end of training. + + Args: + logs (dict): The logs is a dict or None. The keys of logs + passed by hapi.Model contains 'loss', metric names and + `batch_size`. + """ + + def on_eval_begin(self, logs=None): + """Called at the start of evaluation. + + Args: + logs (dict): The logs is a dict or None. The keys of logs + passed by hapi.Model contains 'steps' and 'metrics', + The `steps` is number of total steps of validation dataset. + The `metrics` is a list of str including 'loss' and the names + of hapi.Metric. + """ + + def on_eval_end(self, logs=None): + """Called at the end of evaluation. + + Args: + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is a dict contains 'loss', metrics and 'batch_size' + of last batch of validation dataset. + """ + + def on_test_begin(self, logs=None): + """Called at the beginning of predict. + + Args: + logs (dict): The logs is a dict or None. + """ + + def on_test_end(self, logs=None): + """Called at the end of predict. + + Args: + logs (dict): The logs is a dict or None. + """ + + def on_epoch_begin(self, epoch, logs=None): + """Called at the beginning of each epoch. + + Args: + epoch (int): The index of epoch. + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is None. + """ + + def on_epoch_end(self, epoch, logs=None): + """Called at the end of each epoch. + + Args: + epoch (int): The index of epoch. + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + of last batch. + """ + + def on_train_batch_begin(self, step, logs=None): + """Called at the beginning of each batch in training. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is empty. + """ + + def on_train_batch_end(self, step, logs=None): + """Called at the end of each batch in training. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + of current batch. + """ + + def on_eval_batch_begin(self, step, logs=None): + """Called at the beginning of each batch in evaluation. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is empty. + """ + + def on_eval_batch_end(self, step, logs=None): + """Called at the end of each batch in evaluation. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. The `logs` passed by + hapi.Model is a dict, contains 'loss', metrics and 'batch_size' + of current batch. + """ + + def on_test_batch_begin(self, step, logs=None): + """Called at the beginning of each batch in predict. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. + """ + + def on_test_batch_end(self, step, logs=None): + """Called at the end of each batch in predict. + + Args: + step (int): The index of step (or iteration). + logs (dict): The logs is a dict or None. + """ + + +class ProgBarLogger(Callback): + """Logger callback function + Args: + log_freq (int): The frequency, in number of steps, the logs such as `loss`, + `metrics` are printed. Default: 1. + verbose (int): The verbosity mode, should be 0, 1, or 2. + 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.callbacks import ProgBarLogger + from paddle.incubate.hapi.model import Input, set_device + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + train_dataset = MNIST(mode='train') + + model = LeNet() + + optim = fluid.optimizer.Adam(0.001) + model.prepare(optimizer=optim, + loss_function=CrossEntropy(), + metrics=Accuracy(), + inputs=inputs, + labels=labels) + + callback = ProgBarLogger(log_freq=10) + model.fit(train_dataset, batch_size=64, callbacks=callback) + """ + + def __init__(self, log_freq=1, verbose=2): + self.epochs = None + self.steps = None + self.progbar = None + self.verbose = verbose + self.log_freq = log_freq + + def _is_print(self): + return self.verbose and ParallelEnv().local_rank == 0 + + def on_train_begin(self, logs=None): + self.epochs = self.params['epochs'] + assert self.epochs + self.train_metrics = self.params['metrics'] + assert self.train_metrics + + def on_epoch_begin(self, epoch=None, logs=None): + self.steps = self.params['steps'] + self.epoch = epoch + self.train_step = 0 + if self.epochs and self._is_print(): + print('Epoch %d/%d' % (epoch + 1, self.epochs)) + self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose) + + def _updates(self, logs, mode): + values = [] + metrics = getattr(self, '%s_metrics' % (mode)) + progbar = getattr(self, '%s_progbar' % (mode)) + steps = getattr(self, '%s_step' % (mode)) + + for k in metrics: + if k in logs: + values.append((k, logs[k])) + + progbar.update(steps, values) + + def on_train_batch_end(self, step, logs=None): + logs = logs or {} + self.train_step += 1 + + if self._is_print() and self.train_step % self.log_freq == 0: + if self.steps is None or self.train_step < self.steps: + self._updates(logs, 'train') + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + if self._is_print() and (self.steps is not None): + self._updates(logs, 'train') + + def on_eval_begin(self, logs=None): + self.eval_steps = logs.get('steps', None) + self.eval_metrics = logs.get('metrics', []) + self.eval_step = 0 + self.evaled_samples = 0 + + self.eval_progbar = ProgressBar( + num=self.eval_steps, verbose=self.verbose) + if self._is_print(): + print('Eval begin...') + + def on_eval_batch_end(self, step, logs=None): + logs = logs or {} + self.eval_step += 1 + samples = logs.get('batch_size', 1) + self.evaled_samples += samples + + if self._is_print() and self.eval_step % self.log_freq == 0: + if self.eval_steps is None or self.eval_step < self.eval_steps: + self._updates(logs, 'eval') + + def on_test_begin(self, logs=None): + self.test_steps = logs.get('steps', None) + self.test_metrics = logs.get('metrics', []) + self.test_step = 0 + self.tested_samples = 0 + self.test_progbar = ProgressBar( + num=self.test_steps, verbose=self.verbose) + if self._is_print(): + print('Predict begin...') + + def on_test_batch_end(self, step, logs=None): + logs = logs or {} + self.test_step += 1 + samples = logs.get('batch_size', 1) + self.tested_samples += samples + + if self.test_step % self.log_freq == 0 and self._is_print(): + if self.test_steps is None or self.test_step < self.test_steps: + self._updates(logs, 'test') + + def on_eval_end(self, logs=None): + logs = logs or {} + if self._is_print() and (self.eval_steps is not None): + self._updates(logs, 'eval') + print('Eval samples: %d' % (self.evaled_samples)) + + def on_test_end(self, logs=None): + logs = logs or {} + if self._is_print(): + if self.test_step % self.log_freq != 0 or self.verbose == 1: + self._updates(logs, 'test') + print('Predict samples: %d' % (self.tested_samples)) + + +class ModelCheckpoint(Callback): + """Model checkpoint callback function + Args: + save_freq(int): The frequency, in number of epochs, the model checkpoint + are saved. Default: 1. + save_dir(str|None): The directory to save checkpoint during training. + If None, will not save checkpoint. Default: None. + + Examples: + .. code-block:: python + + import numpy as np + from paddle import fluid + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.datasets import MNIST + + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.callbacks import ModelCheckpoint + from paddle.incubate.hapi.model import Input, set_device + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + train_dataset = MNIST(mode='train') + + model = LeNet() + + optim = fluid.optimizer.Adam(0.001) + model.prepare(optimizer=optim, + loss_function=CrossEntropy(), + metrics=Accuracy(), + inputs=inputs, + labels=labels) + + callback = ModelCheckpoint(save_dir='./temp') + model.fit(train_dataset, batch_size=64, callbacks=callback) + """ + + def __init__(self, save_freq=1, save_dir=None): + self.save_freq = save_freq + self.save_dir = save_dir + + def on_epoch_begin(self, epoch=None, logs=None): + self.epoch = epoch + + def _is_save(self): + return self.model and self.save_dir and ParallelEnv().local_rank == 0 + + def on_epoch_end(self, epoch, logs=None): + if self._is_save() and self.epoch % self.save_freq == 0: + path = '{}/{}'.format(self.save_dir, epoch) + print('save checkpoint at {}'.format(path)) + self.model.save(path) + + def on_train_end(self, logs=None): + if self._is_save(): + path = '{}/final'.format(self.save_dir) + print('save checkpoint at {}'.format(path)) + self.model.save(path) diff --git a/python/paddle/incubate/hapi/datasets/__init__.py b/python/paddle/incubate/hapi/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fc5df6401992def4bc37329794e534a832924da3 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import folder +from . import mnist +from . import flowers + +from .folder import * +from .mnist import * +from .flowers import * + +__all__ = folder.__all__ \ + + mnist.__all__ \ + + flowers.__all__ diff --git a/python/paddle/incubate/hapi/datasets/flowers.py b/python/paddle/incubate/hapi/datasets/flowers.py new file mode 100644 index 0000000000000000000000000000000000000000..6f56cc82c1cba800002d82cc8a2bd5ddae619f9e --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/flowers.py @@ -0,0 +1,129 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import io +import tarfile +import numpy as np +import scipy.io as scio +from PIL import Image + +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ["Flowers"] + +DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz' +LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat' +SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat' +DATA_MD5 = '52808999861908f626f3c1f4e79d11fa' +LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' +SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' + +# In official 'readme', tstid is the flag of test data +# and trnid is the flag of train data. But test data is more than train data. +# So we exchange the train data and test data. +MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"} + + +class Flowers(Dataset): + """ + Implement of flowers dataset + + Args: + data_file(str): path to data file, can be set None if + :attr:`download` is True. Default None + label_file(str): path to label file, can be set None if + :attr:`download` is True. Default None + setid_file(str): path to subset index file, can be set + None if :attr:`download` is True. Default None + mode(str): 'train', 'valid' or 'test' mode. Default 'train'. + download(bool): whether auto download mnist dataset if + :attr:`image_path`/:attr:`label_path` unset. Default + True + + Examples: + + .. code-block:: python + + from paddle.incubate.hapi.datasets import Flowers + + flowers = Flowers(mode='test') + + for i in range(len(flowers)): + sample = flowers[i] + print(sample[0].shape, sample[1]) + + """ + + def __init__(self, + data_file=None, + label_file=None, + setid_file=None, + mode='train', + transform=None, + download=True): + assert mode.lower() in ['train', 'valid', 'test'], \ + "mode should be 'train', 'valid' or 'test', but got {}".format(mode) + self.flag = MODE_FLAG_MAP[mode.lower()] + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file not set and auto download disabled" + self.data_file = _check_exists_and_download( + data_file, DATA_URL, DATA_MD5, 'flowers', download) + + self.label_file = label_file + if self.label_file is None: + assert download, "label_file not set and auto download disabled" + self.label_file = _check_exists_and_download( + label_file, LABEL_URL, LABEL_MD5, 'flowers', download) + + self.setid_file = setid_file + if self.setid_file is None: + assert download, "setid_file not set and auto download disabled" + self.setid_file = _check_exists_and_download( + setid_file, SETID_URL, SETID_MD5, 'flowers', download) + + self.transform = transform + + # read dataset into memory + self._load_anno() + + def _load_anno(self): + self.name2mem = {} + self.data_tar = tarfile.open(self.data_file) + for ele in self.data_tar.getmembers(): + self.name2mem[ele.name] = ele + + self.labels = scio.loadmat(self.label_file)['labels'][0] + self.indexes = scio.loadmat(self.setid_file)[self.flag][0] + + def __getitem__(self, idx): + index = self.indexes[idx] + label = np.array([self.labels[index - 1]]) + img_name = "jpg/image_%05d.jpg" % index + img_ele = self.name2mem[img_name] + image = self.data_tar.extractfile(img_ele).read() + image = np.array(Image.open(io.BytesIO(image))) + + if self.transform is not None: + image = self.transform(image) + + return image, label.astype('int64') + + def __len__(self): + return len(self.indexes) diff --git a/python/paddle/incubate/hapi/datasets/folder.py b/python/paddle/incubate/hapi/datasets/folder.py new file mode 100644 index 0000000000000000000000000000000000000000..358e7681eb8e64364600732f0399e6b97f0d64e0 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/folder.py @@ -0,0 +1,299 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 + +from paddle.io import Dataset + +__all__ = ["DatasetFolder", "ImageFolder"] + + +def has_valid_extension(filename, extensions): + """Checks if a file is a vilid extension. + + Args: + filename (str): path to a file + extensions (tuple of str): extensions to consider (lowercase) + + Returns: + bool: True if the filename ends with one of given extensions + """ + return filename.lower().endswith(extensions) + + +def make_dataset(dir, class_to_idx, extensions, is_valid_file=None): + images = [] + dir = os.path.expanduser(dir) + + if extensions is not None: + + def is_valid_file(x): + return has_valid_extension(x, extensions) + + for target in sorted(class_to_idx.keys()): + d = os.path.join(dir, target) + if not os.path.isdir(d): + continue + for root, _, fnames in sorted(os.walk(d, followlinks=True)): + for fname in sorted(fnames): + path = os.path.join(root, fname) + if is_valid_file(path): + item = (path, class_to_idx[target]) + images.append(item) + + return images + + +class DatasetFolder(Dataset): + """A generic data loader where the samples are arranged in this way: + + root/class_a/1.ext + root/class_a/2.ext + root/class_a/3.ext + + root/class_b/123.ext + root/class_b/456.ext + root/class_b/789.ext + + Args: + root (string): Root directory path. + loader (callable|optional): A function to load a sample given its path. + extensions (tuple[str]|optional): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable|optional): A function/transform that takes in + a sample and returns a transformed version. + is_valid_file (callable|optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + + Attributes: + classes (list): List of the class names. + class_to_idx (dict): Dict with items (class_name, class_index). + samples (list): List of (sample path, class_index) tuples + targets (list): The class_index value for each image in the dataset + + Example: + + .. code-block:: python + + import os + import cv2 + import tempfile + import shutil + import numpy as np + from paddle.incubate.hapi.datasets import DatasetFolder + + def make_fake_dir(): + data_dir = tempfile.mkdtemp() + + for i in range(2): + sub_dir = os.path.join(data_dir, 'class_' + str(i)) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + for j in range(2): + fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8') + cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img) + return data_dir + + temp_dir = make_fake_dir() + data_folder = DatasetFolder(temp_dir) + + for items in data_folder: + break + + shutil.rmtree(temp_dir) + """ + + def __init__(self, + root, + loader=None, + extensions=None, + transform=None, + is_valid_file=None): + self.root = root + self.transform = transform + if extensions is None: + extensions = IMG_EXTENSIONS + classes, class_to_idx = self._find_classes(self.root) + samples = make_dataset(self.root, class_to_idx, extensions, + is_valid_file) + if len(samples) == 0: + raise (RuntimeError( + "Found 0 files in subfolders of: " + self.root + "\n" + "Supported extensions are: " + ",".join(extensions))) + + self.loader = cv2_loader if loader is None else loader + self.extensions = extensions + + self.classes = classes + self.class_to_idx = class_to_idx + self.samples = samples + self.targets = [s[1] for s in samples] + + def _find_classes(self, dir): + """ + Finds the class folders in a dataset. + + Args: + dir (string): Root directory path. + + Returns: + tuple: (classes, class_to_idx) where classes are relative to (dir), + and class_to_idx is a dictionary. + + """ + if sys.version_info >= (3, 5): + # Faster and available in Python 3.5 and above + classes = [d.name for d in os.scandir(dir) if d.is_dir()] + else: + classes = [ + d for d in os.listdir(dir) + if os.path.isdir(os.path.join(dir, d)) + ] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + + return sample, target + + def __len__(self): + return len(self.samples) + + +IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', + '.tiff', '.webp') + + +def cv2_loader(path): + return cv2.imread(path) + + +class ImageFolder(Dataset): + """A generic data loader where the samples are arranged in this way: + + root/1.ext + root/2.ext + root/sub_dir/3.ext + + Args: + root (string): Root directory path. + loader (callable, optional): A function to load a sample given its path. + extensions (tuple[string], optional): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + is_valid_file (callable, optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + + Attributes: + samples (list): List of sample path + + Example: + + .. code-block:: python + + import os + import cv2 + import tempfile + import shutil + import numpy as np + from paddle.incubate.hapi.datasets import ImageFolder + + def make_fake_dir(): + data_dir = tempfile.mkdtemp() + + for i in range(2): + sub_dir = os.path.join(data_dir, 'class_' + str(i)) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + for j in range(2): + fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8') + cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img) + return data_dir + + temp_dir = make_fake_dir() + data_folder = ImageFolder(temp_dir) + + for items in data_folder: + break + + shutil.rmtree(temp_dir) + """ + + def __init__(self, + root, + loader=None, + extensions=None, + transform=None, + is_valid_file=None): + self.root = root + if extensions is None: + extensions = IMG_EXTENSIONS + + samples = [] + path = os.path.expanduser(root) + + if extensions is not None: + + def is_valid_file(x): + return has_valid_extension(x, extensions) + + for root, _, fnames in sorted(os.walk(path, followlinks=True)): + for fname in sorted(fnames): + f = os.path.join(root, fname) + if is_valid_file(f): + samples.append(f) + + if len(samples) == 0: + raise (RuntimeError( + "Found 0 files in subfolders of: " + self.root + "\n" + "Supported extensions are: " + ",".join(extensions))) + + self.loader = cv2_loader if loader is None else loader + self.extensions = extensions + self.samples = samples + self.transform = transform + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + path = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + return [sample] + + def __len__(self): + return len(self.samples) diff --git a/python/paddle/incubate/hapi/datasets/mnist.py b/python/paddle/incubate/hapi/datasets/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..bd48ca1c9668b40ac0379bfeda11a5c056f9fd44 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/mnist.py @@ -0,0 +1,162 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import gzip +import struct +import numpy as np + +import paddle.dataset.common +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ["MNIST"] + +URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/' +TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' +TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3' +TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz' +TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c' +TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz' +TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873' +TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz' +TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432' + + +class MNIST(Dataset): + """ + Implement of MNIST dataset + + Args: + image_path(str): path to image file, can be set None if + :attr:`download` is True. Default None + label_path(str): path to label file, can be set None if + :attr:`download` is True. Default None + chw_format(bool): If set True, the output shape is [1, 28, 28], + otherwise, output shape is [1, 784]. Default True. + mode(str): 'train' or 'test' mode. Default 'train'. + download(bool): whether auto download mnist dataset if + :attr:`image_path`/:attr:`label_path` unset. Default + True + + Returns: + Dataset: MNIST Dataset. + + Examples: + + .. code-block:: python + + from paddle.incubate.hapi.datasets import MNIST + + mnist = MNIST(mode='test') + + for i in range(len(mnist)): + sample = mnist[i] + print(sample[0].shape, sample[1]) + + """ + + def __init__(self, + image_path=None, + label_path=None, + chw_format=True, + mode='train', + transform=None, + download=True): + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train' or 'test', but got {}".format(mode) + self.mode = mode.lower() + self.chw_format = chw_format + self.image_path = image_path + if self.image_path is None: + assert download, "image_path not set and auto download disabled" + image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL + image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5 + self.image_path = _check_exists_and_download( + image_path, image_url, image_md5, 'mnist', download) + + self.label_path = label_path + if self.label_path is None: + assert download, "label_path not set and auto download disabled" + label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL + label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5 + self.label_path = _check_exists_and_download( + label_path, label_url, label_md5, 'mnist', download) + + self.transform = transform + + # read dataset into memory + self._parse_dataset() + + def _parse_dataset(self, buffer_size=100): + self.images = [] + self.labels = [] + with gzip.GzipFile(self.image_path, 'rb') as image_file: + img_buf = image_file.read() + with gzip.GzipFile(self.label_path, 'rb') as label_file: + lab_buf = label_file.read() + + step_label = 0 + offset_img = 0 + # read from Big-endian + # get file info from magic byte + # image file : 16B + magic_byte_img = '>IIII' + magic_img, image_num, rows, cols = struct.unpack_from( + magic_byte_img, img_buf, offset_img) + offset_img += struct.calcsize(magic_byte_img) + + offset_lab = 0 + # label file : 8B + magic_byte_lab = '>II' + magic_lab, label_num = struct.unpack_from(magic_byte_lab, + lab_buf, offset_lab) + offset_lab += struct.calcsize(magic_byte_lab) + + while True: + if step_label >= label_num: + break + fmt_label = '>' + str(buffer_size) + 'B' + labels = struct.unpack_from(fmt_label, lab_buf, offset_lab) + offset_lab += struct.calcsize(fmt_label) + step_label += buffer_size + + fmt_images = '>' + str(buffer_size * rows * cols) + 'B' + images_temp = struct.unpack_from(fmt_images, img_buf, + offset_img) + images = np.reshape(images_temp, (buffer_size, rows * + cols)).astype('float32') + offset_img += struct.calcsize(fmt_images) + + images = images / 255.0 + images = images * 2.0 + images = images - 1.0 + + for i in range(buffer_size): + self.images.append(images[i, :]) + self.labels.append( + np.array([labels[i]]).astype('int64')) + + def __getitem__(self, idx): + image, label = self.images[idx], self.labels[idx] + if self.chw_format: + image = np.reshape(image, [1, 28, 28]) + if self.transform is not None: + image = self.transform(image) + return image, label + + def __len__(self): + return len(self.labels) diff --git a/python/paddle/incubate/hapi/datasets/utils.py b/python/paddle/incubate/hapi/datasets/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..171f794ba9df4270727a23cc6cd039a9faa81970 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/utils.py @@ -0,0 +1,29 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import paddle.dataset.common + + +def _check_exists_and_download(path, url, md5, module_name, download=True): + if path and os.path.exists(path): + return path + + if download: + return paddle.dataset.common.download(url, module_name, md5) + else: + raise ValueError('{} not exists and auto download disabled'.format( + path)) diff --git a/python/paddle/incubate/hapi/distributed.py b/python/paddle/incubate/hapi/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..585f466ea6a1ef5a3d888b7c46fe2908ffd2c769 --- /dev/null +++ b/python/paddle/incubate/hapi/distributed.py @@ -0,0 +1,254 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import time +import math +import socket +import contextlib +import numpy as np + +from paddle import fluid +from paddle.fluid.layers import collective +from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy +from paddle.io import BatchSampler + +_parallel_context_initialized = False + +__all__ = ['DistributedBatchSampler'] + + +class DistributedBatchSampler(BatchSampler): + """Sampler that restricts data loading to a subset of the dataset. + + In such case, each process can pass a DistributedBatchSampler instance + as a DataLoader sampler, and load a subset of the original dataset that + is exclusive to it. + + .. note:: + Dataset is assumed to be of constant size. + + Args: + dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement + or other python object which implemented + `__len__` for BatchSampler to get sample + number of data source. + batch_size(int): sample indice number in a mini-batch indices. + shuffle(bool): whther to shuffle indices order before genrating + batch indices. Default False. + drop_last(bool): whether drop the last incomplete batch dataset size + is not divisible by the batch size. Default False + + Examples: + .. code-block:: python + + import numpy as np + + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.distributed import DistributedBatchSampler + + class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + + def __getitem__(self, idx): + img = np.reshape(self.images[idx], [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + train_dataset = MnistDataset(mode='train') + dist_train_dataloader = DistributedBatchSampler(train_dataset, batch_size=64) + + for data in dist_train_dataloader: + # do something + break + """ + + def __init__(self, dataset, batch_size, shuffle=False, drop_last=False): + self.dataset = dataset + + assert isinstance(batch_size, int) and batch_size > 0, \ + "batch_size should be a positive integer" + self.batch_size = batch_size + assert isinstance(shuffle, bool), \ + "shuffle should be a boolean value" + self.shuffle = shuffle + assert isinstance(drop_last, bool), \ + "drop_last should be a boolean number" + + self.drop_last = drop_last + self.nranks = ParallelEnv().nranks + self.local_rank = ParallelEnv().local_rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks)) + self.total_size = self.num_samples * self.nranks + + def __iter__(self): + num_samples = len(self.dataset) + indices = np.arange(num_samples).tolist() + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + if self.shuffle: + np.random.RandomState(self.epoch).shuffle(indices) + self.epoch += 1 + + # subsample + def _get_indices_by_batch_size(indices): + subsampled_indices = [] + last_batch_size = self.total_size % (self.batch_size * self.nranks) + assert last_batch_size % self.nranks == 0 + last_local_batch_size = last_batch_size // self.nranks + + for i in range(self.local_rank * self.batch_size, + len(indices) - last_batch_size, + self.batch_size * self.nranks): + subsampled_indices.extend(indices[i:i + self.batch_size]) + + indices = indices[len(indices) - last_batch_size:] + subsampled_indices.extend(indices[ + self.local_rank * last_local_batch_size:( + self.local_rank + 1) * last_local_batch_size]) + return subsampled_indices + + if self.nranks > 1: + indices = _get_indices_by_batch_size(indices) + + assert len(indices) == self.num_samples + _sample_iter = iter(indices) + + batch_indices = [] + for idx in _sample_iter: + batch_indices.append(idx) + if len(batch_indices) == self.batch_size: + yield batch_indices + batch_indices = [] + if not self.drop_last and len(batch_indices) > 0: + yield batch_indices + + def __len__(self): + num_samples = self.num_samples + num_samples += int(not self.drop_last) * (self.batch_size - 1) + return num_samples // self.batch_size + + def set_epoch(self, epoch): + self.epoch = epoch + + +def _all_gather(x, nranks, ring_id=0, use_calc_stream=True): + return collective._c_allgather( + x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream) + + +def wait_server_ready(endpoints): + assert not isinstance(endpoints, six.string_types) + while True: + all_ok = True + not_ready_endpoints = [] + for ep in endpoints: + ip_port = ep.split(":") + with contextlib.closing( + socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex((ip_port[0], int(ip_port[1]))) + if result != 0: + all_ok = False + not_ready_endpoints.append(ep) + if not all_ok: + time.sleep(3) + else: + break + + +def init_communicator(program, rank, nranks, wait_port, current_endpoint, + endpoints): + if nranks < 2: + return + other_endpoints = endpoints[:] + other_endpoints.remove(current_endpoint) + if rank == 0 and wait_port: + wait_server_ready(other_endpoints) + block = program.global_block() + nccl_id_var = block.create_var( + name=fluid.unique_name.generate('nccl_id'), + persistable=True, + type=fluid.core.VarDesc.VarType.RAW) + + block.append_op( + type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + + block.append_op( + type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': 0, + }) + + +def prepare_distributed_context(place=None): + if place is None: + place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \ + else fluid.CUDAPlace(0) + + strategy = ParallelStrategy() + strategy.nranks = ParallelEnv().nranks + strategy.local_rank = ParallelEnv().local_rank + strategy.trainer_endpoints = ParallelEnv().trainer_endpoints + strategy.current_endpoint = ParallelEnv().current_endpoint + + if strategy.nranks < 2: + return + + global _parallel_context_initialized + + if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace): + + def _init_context(): + communicator_prog = fluid.Program() + init_communicator(communicator_prog, strategy.local_rank, + strategy.nranks, True, strategy.current_endpoint, + strategy.trainer_endpoints) + exe = fluid.Executor(place) + exe.run(communicator_prog) + + if fluid.in_dygraph_mode(): + fluid.disable_dygraph() + _init_context() + fluid.enable_dygraph(place) + else: + _init_context() + + else: + assert ("Only support CUDAPlace for now.") + + _parallel_context_initialized = True + return strategy diff --git a/python/paddle/incubate/hapi/download.py b/python/paddle/incubate/hapi/download.py new file mode 100644 index 0000000000000000000000000000000000000000..0c80a68392b941e13880afa980a2a8e1da8acfee --- /dev/null +++ b/python/paddle/incubate/hapi/download.py @@ -0,0 +1,235 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import os.path as osp +import shutil +import requests +import hashlib +import time +from collections import OrderedDict +from paddle.fluid.dygraph.parallel import ParallelEnv + +try: + from tqdm import tqdm +except: + + class tqdm(object): + def __init__(self, total=None): + self.total = total + self.n = 0 + + def update(self, n): + self.n += n + if self.total is None: + sys.stderr.write("\r{0:.1f} bytes".format(self.n)) + else: + sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float( + self.total))) + sys.stderr.flush() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stderr.write('\n') + + +import logging +logger = logging.getLogger(__name__) + +__all__ = ['get_weights_path_from_url'] + +WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") + +DOWNLOAD_RETRY_LIMIT = 3 + +nlp_models = OrderedDict(( + ('RoBERTa-zh-base', + 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz' + ), + ('RoBERTa-zh-large', + 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz' + ), + ('ERNIE-v2-en-base', + 'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'), + ('ERNIE-v2-en-large', + 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'), + ('XLNet-cased-base', + 'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'), + ('XLNet-cased-large', + 'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'), + ('ERNIE-v1-zh-base', + 'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'), + ('ERNIE-v1-zh-base-max-len-512', + 'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'), + ('BERT-en-uncased-large-whole-word-masking', + 'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'), + ('BERT-en-cased-large-whole-word-masking', + 'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'), + ('BERT-en-uncased-base', + 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'), + ('BERT-en-uncased-large', + 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'), + ('BERT-en-cased-base', + 'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'), + ('BERT-en-cased-large', + 'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'), + ('BERT-multilingual-uncased-base', + 'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'), + ('BERT-multilingual-cased-base', + 'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'), + ('BERT-zh-base', + 'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), )) + + +def is_url(path): + """ + Whether path is URL. + Args: + path (string): URL string or not. + """ + return path.startswith('http://') or path.startswith('https://') + + +def get_weights_path_from_url(url, md5sum=None): + """Get weights path from WEIGHT_HOME, if not exists, + download it from url. + + Args: + url (str): download url + md5sum (str): md5 sum of download package + + Returns: + str: a local path to save downloaded weights. + + Examples: + .. code-block:: python + + from paddle.incubate.hapi.download import get_weights_path_from_url + + resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams' + local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url) + + """ + path = get_path_from_url(url, WEIGHTS_HOME, md5sum) + return path + + +def _map_path(url, root_dir): + # parse path after download under root_dir + fname = osp.split(url)[-1] + fpath = fname + return osp.join(root_dir, fpath) + + +def get_path_from_url(url, root_dir, md5sum=None, check_exist=True): + """ Download from given url to root_dir. + if file or directory specified by url is exists under + root_dir, return the path directly, otherwise download + from url and decompress it, return the path. + + Args: + url (str): download url + root_dir (str): root dir for downloading, it should be + WEIGHTS_HOME or DATASET_HOME + md5sum (str): md5 sum of download package + + Returns: + str: a local path to save downloaded models & weights & datasets. + """ + assert is_url(url), "downloading from {} not a url".format(url) + # parse path after download to decompress under root_dir + fullpath = _map_path(url, root_dir) + + if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum): + logger.info("Found {}".format(fullpath)) + else: + if ParallelEnv().local_rank == 0: + fullpath = _download(url, root_dir, md5sum) + else: + while not os.path.exists(fullpath): + time.sleep(1) + return fullpath + + +def _download(url, path, md5sum=None): + """ + Download from url, save to path. + + url (str): download url + path (str): download to given path + """ + if not osp.exists(path): + os.makedirs(path) + + fname = osp.split(url)[-1] + fullname = osp.join(path, fname) + retry_cnt = 0 + + while not (osp.exists(fullname) and _md5check(fullname, md5sum)): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + raise RuntimeError("Download from {} failed. " + "Retry limit reached".format(url)) + + logger.info("Downloading {} from {}".format(fname, url)) + + req = requests.get(url, stream=True) + if req.status_code != 200: + raise RuntimeError("Downloading from {} failed with code " + "{}!".format(url, req.status_code)) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + with tqdm(total=(int(total_size) + 1023) // 1024) as pbar: + for chunk in req.iter_content(chunk_size=1024): + f.write(chunk) + pbar.update(1) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + + return fullname + + +def _md5check(fullname, md5sum=None): + if md5sum is None: + return True + + logger.info("File {} md5 checking...".format(fullname)) + md5 = hashlib.md5() + with open(fullname, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b""): + md5.update(chunk) + calc_md5sum = md5.hexdigest() + + if calc_md5sum != md5sum: + logger.info("File {} md5 check failed, {}(calc) != " + "{}(base)".format(fullname, calc_md5sum, md5sum)) + return False + return True diff --git a/python/paddle/incubate/hapi/logger.py b/python/paddle/incubate/hapi/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..d4f18ce0ff738c966f1e237beffc9da366e3ae64 --- /dev/null +++ b/python/paddle/incubate/hapi/logger.py @@ -0,0 +1,71 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import logging + +from paddle.fluid.dygraph.parallel import ParallelEnv + + +def setup_logger(output=None, name="hapi", log_level=logging.INFO): + """ + Initialize logger of hapi and set its verbosity level to "INFO". + + Args: + output (str): a file name or a directory to save log. If None, will not save log file. + If ends with ".txt" or ".log", assumed to be a file name. + Otherwise, logs will be saved to `output/log.txt`. + name (str): the root module name of this logger. Default: 'hapi'. + log_level (enum): log level. eg.'INFO', 'DEBUG', 'ERROR'. Default: logging.INFO. + Returns: + logging.Logger: a logger + """ + logger = logging.getLogger(name) + logger.propagate = False + logger.setLevel(log_level) + + format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + # stdout logging: only local rank==0 + local_rank = ParallelEnv().local_rank + if local_rank == 0 and len(logger.handlers) == 0: + ch = logging.StreamHandler(stream=sys.stdout) + ch.setLevel(log_level) + + ch.setFormatter(logging.Formatter(format_str)) + logger.addHandler(ch) + + # file logging if output is not None: all workers + if output is not None: + if output.endswith(".txt") or output.endswith(".log"): + filename = output + else: + filename = os.path.join(output, "log.txt") + + if local_rank > 0: + filename = filename + ".rank{}".format(local_rank) + + if not os.path.exists(os.path.dirname(filename)): + os.makedirs(os.path.dirname(filename)) + + fh = logging.StreamHandler(filename) + fh.setLevel(log_level) + fh.setFormatter(logging.Formatter(format_str)) + logger.addHandler(fh) + + return logger diff --git a/python/paddle/incubate/hapi/loss.py b/python/paddle/incubate/hapi/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..8f2e28477953d7ff7b168b207a7d80b48e9d8611 --- /dev/null +++ b/python/paddle/incubate/hapi/loss.py @@ -0,0 +1,145 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddle import fluid +from paddle.fluid.framework import in_dygraph_mode, Variable +from paddle.fluid.dygraph.base import to_variable + +from .utils import to_list + +__all__ = ['Loss', 'CrossEntropy', 'SoftmaxWithCrossEntropy'] + + +class Loss(object): + """ + Base class for loss, encapsulates loss logic and APIs + + Usage: + custom_loss = CustomLoss() + loss = custom_loss(inputs, labels) + + Examples: + .. code-block:: python + + from paddle.incubate.hapi.loss import Loss + from paddle import fluid + + class SoftmaxWithCrossEntropy(Loss): + def __init__(self, average=True): + super(SoftmaxWithCrossEntropy, self).__init__(average) + + def forward(self, outputs, labels): + return [ + fluid.layers.softmax_with_cross_entropy( + o, l, return_softmax=False) for o, l in zip(outputs, labels) + ] + + """ + + def __init__(self, average=True): + super(Loss, self).__init__() + self.average = average + + def forward(self, outputs, labels): + raise NotImplementedError() + + def __call__(self, outputs, labels=None): + labels = to_list(labels) + if in_dygraph_mode() and labels: + labels = [to_variable(l) for l in labels] + losses = to_list(self.forward(to_list(outputs), labels)) + if self.average: + losses = [fluid.layers.reduce_mean(l) for l in losses] + else: + losses = [fluid.layers.reduce_sum(l) for l in losses] + return losses + + +class CrossEntropy(Loss): + """ + Args: + input (list[Variable]): Input tensor, the data type is float32, + float64, int32, int64. + label (list[Variable]): Label tensor, the data type is float32, + float64, int32, int64. + average (bool, optional): Indicate whether to average the loss, Default: True. + Returns: + list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels. + + Examples: + .. code-block:: python + + from paddle.incubate.hapi.model import Input + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.loss import CrossEntropy + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model = LeNet() + loss = CrossEntropy() + model.prepare(loss_function=loss, inputs=inputs, labels=labels) + + """ + + def __init__(self, average=True): + super(CrossEntropy, self).__init__(average) + + def forward(self, outputs, labels): + return [ + fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels) + ] + + +class SoftmaxWithCrossEntropy(Loss): + """ + this op combined softmax and cross entropy. + Args: + input (list[Variable]): Input tensor, the data type is float32, + float64, int32, int64. + label (list[Variable]): Label tensor, the data type is float32, + float64, int32, int64. + average (bool, optional): Indicate whether to average the loss, Default: True. + Returns: + list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels. + + Examples: + .. code-block:: python + + from paddle.incubate.hapi.model import Input + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.loss import SoftmaxWithCrossEntropy + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model = LeNet(classifier_activation=None) + loss = SoftmaxWithCrossEntropy() + model.prepare(loss_function=loss, inputs=inputs, labels=labels) + """ + + def __init__(self, average=True): + super(SoftmaxWithCrossEntropy, self).__init__(average) + + def forward(self, outputs, labels): + return [ + fluid.layers.softmax_with_cross_entropy( + o, l, return_softmax=False) for o, l in zip(outputs, labels) + ] diff --git a/python/paddle/incubate/hapi/metrics.py b/python/paddle/incubate/hapi/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..7c3d45b05816b54dbdb0fdd9af0d817e796bc034 --- /dev/null +++ b/python/paddle/incubate/hapi/metrics.py @@ -0,0 +1,242 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import abc +import numpy as np +import paddle.fluid as fluid + +import logging + +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) + +__all__ = ['Metric', 'Accuracy'] + + +@six.add_metaclass(abc.ABCMeta) +class Metric(object): + """ + Base class for metric, encapsulates metric logic and APIs + Usage: + + m = SomeMetric() + for prediction, label in ...: + m.update(prediction, label) + m.accumulate() + + Advanced usage for :code:`add_metric_op` + Metric calculating con be accelerate by calucateing metric states + from model outputs and labels by Paddle OPs in :code:`add_metric_op`, + metric states will be fetch as numpy array and call :code:`update` + with states in numpy format. + Metric calculated as follows (operations in Model and Metric are + indicated with curly brackets, while data nodes not): + inputs & labels || ------------------ + | || + {model} || + | || + outputs & labels || + | || tensor data + {Metric.add_metric_op} || + | || + metric states(tensor) || + | || + {fetch as numpy} || ------------------ + | || + metric states(numpy) || numpy data + | || + {Metric.update} \/ ------------------ + Examples: + + For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label` + as inputs, we can calculate the correct prediction matrix between + :code:`pred` and :code:`label` in :code:`add_metric_op`. + For examples, prediction results contains 10 classes, while :code:`pred` + shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size, + and we only need to calculate accurary of top-1 and top-5, we could + calculated the correct prediction matrix of the top-5 scores of the + prediction of each sample like follows, while the correct prediction + matrix shape is [N, 5]. + .. code-block:: python + def add_metric_op(pred, label): + # sort prediction and slice the top-5 scores + pred = fluid.layers.argsort(pred, descending=True)[1][:, :5] + # calculate whether the predictions are correct + correct = pred == label + return fluid.layers.cast(correct, dtype='float32') + With the :code:`add_metric_op`, we split some calculations to OPs(which + may run on GPU devices, will be faster), and only fetch 1 tensor with + shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1]. + :code:`update` can be define as follows: + .. code-block:: python + def update(self, correct): + accs = [] + for i, k in enumerate(self.topk): + num_corrects = correct[:, :k].sum() + num_samples = len(correct) + accs.append(float(num_corrects) / num_samples) + self.total[i] += num_corrects + self.count[i] += num_samples + return accs + """ + + def __init__(self): + pass + + @abc.abstractmethod + def reset(self): + """ + Reset states and result + """ + raise NotImplementedError("function 'reset' not implemented in {}.". + format(self.__class__.__name__)) + + @abc.abstractmethod + def update(self, *args): + """ + Update states for metric + + Inputs of :code:`update` is the outputs of :code:`Metric.add_metric_op`, + if :code:`add_metric_op` is not defined, the inputs of :code:`update` + will be flatten arguments of **output** of mode and **label** from data: + :code:`update(output1, output2, ..., label1, label2,...)` + + see :code:`Metric.add_metric_op` + """ + raise NotImplementedError("function 'update' not implemented in {}.". + format(self.__class__.__name__)) + + @abc.abstractmethod + def accumulate(self): + """ + Accumulates statistics, computes and returns the metric value + """ + raise NotImplementedError( + "function 'accumulate' not implemented in {}.".format( + self.__class__.__name__)) + + @abc.abstractmethod + def name(self): + """ + Returns metric name + """ + raise NotImplementedError("function 'name' not implemented in {}.". + format(self.__class__.__name__)) + + def add_metric_op(self, *args): + """ + This API is advanced usage to accelerate metric calculating, calulations + from outputs of model to the states which should be updated by Metric can + be defined here, where Paddle OPs is also supported. Outputs of this API + will be the inputs of "Metric.update". + + If :code:`add_metric_op` is defined, it will be called with **outputs** + of model and **labels** from data as arguments, all outputs and labels + will be concatenated and flatten and each filed as a separate argument + as follows: + :code:`add_metric_op(output1, output2, ..., label1, label2,...)` + + If :code:`add_metric_op` is not defined, default behaviour is to pass + input to output, so output format will be: + :code:`return output1, output2, ..., label1, label2,...` + + see :code:`Metric.update` + """ + return args + + +class Accuracy(Metric): + """ + Encapsulates accuracy metric logic + + Examples: + + .. code-block:: python + + from paddle import fluid + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.model import Input + from paddle.incubate.hapi.vision.models import LeNet + + fluid.enable_dygraph() + + train_dataset = MNIST(mode='train') + + model = LeNet() + optim = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=model.parameters()) + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model.prepare( + optim, + loss_function=CrossEntropy(average=False), + metrics=Accuracy(), + inputs=inputs, + labels=labels) + + model.fit(train_dataset, batch_size=64) + + """ + + def __init__(self, topk=(1, ), name=None, *args, **kwargs): + super(Accuracy, self).__init__(*args, **kwargs) + self.topk = topk + self.maxk = max(topk) + self._init_name(name) + self.reset() + + def add_metric_op(self, pred, label, *args): + pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk] + correct = pred == label + return fluid.layers.cast(correct, dtype='float32') + + def update(self, correct, *args): + accs = [] + for i, k in enumerate(self.topk): + num_corrects = correct[:, :k].sum() + num_samples = len(correct) + accs.append(float(num_corrects) / num_samples) + self.total[i] += num_corrects + self.count[i] += num_samples + return accs + + def reset(self): + self.total = [0.] * len(self.topk) + self.count = [0] * len(self.topk) + + def accumulate(self): + res = [] + for t, c in zip(self.total, self.count): + res.append(float(t) / c) + return res + + def _init_name(self, name): + name = name or 'acc' + if self.maxk != 1: + self._name = ['{}_top{}'.format(name, k) for k in self.topk] + else: + self._name = [name] + + def name(self): + return self._name diff --git a/python/paddle/incubate/hapi/model.py b/python/paddle/incubate/hapi/model.py new file mode 100644 index 0000000000000000000000000000000000000000..e089c26decb5efd6754d0a36ed0b5dd8d09af9ab --- /dev/null +++ b/python/paddle/incubate/hapi/model.py @@ -0,0 +1,1668 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import inspect +import os +import pickle +import numpy as np +import six +import warnings +from collections import Iterable + +from paddle import fluid +from paddle.fluid.framework import in_dygraph_mode, Variable +from paddle.fluid.executor import global_scope +from paddle.fluid.io import is_belong_to_optimizer +from paddle.fluid.dygraph.base import to_variable +from paddle.fluid.dygraph.parallel import ParallelEnv +from paddle.fluid.layers.utils import flatten +from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy +from paddle.fluid.incubate.fleet.base import role_maker +from paddle.io import DataLoader, Dataset + +from .loss import Loss +from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized +from .metrics import Metric +from .callbacks import config_callbacks +from .utils import to_list, to_numpy, flatten_list, restore_flatten_list + +__all__ = [ + 'Model', + 'Input', + 'set_device', +] + + +def set_device(device): + """ + Args: + device (str): specify device type, 'cpu' or 'gpu'. + + Returns: + fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place. + """ + + assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \ + "Expected device in ['cpu', 'gpu'], but got {}".format(device) + + place = fluid.CUDAPlace(ParallelEnv().dev_id) \ + if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \ + else fluid.CPUPlace() + + return place + + +class Input(fluid.dygraph.Layer): + def __init__(self, shape=None, dtype=None, name=None): + super(Input, self).__init__() + self.shape = shape + self.dtype = dtype + self.name = name + + def forward(self): + return fluid.data(self.name, shape=self.shape, dtype=self.dtype) + + +class StaticGraphAdapter(object): + """ + Model traning/inference with a static graph. + """ + + def __init__(self, model): + super(StaticGraphAdapter, self).__init__() + self.model = model + # with `_build_once` gone, parameters are now created in `__init__` + # so we need to keep track of the parameters already created + self._startup_prog = fluid.default_startup_program() + self._orig_prog = fluid.default_main_program() + + self._label_vars = {} # label variables + self._input_vars = {} # label variables + self._endpoints = {} + self._loss_endpoint = None + self._executor = None + self._progs = {} + self._compiled_progs = {} + + self._merge_count = { + 'eval_total': 0, + 'test_total': 0, + 'eval_batch': 0, + 'test_batch': 0 + } + + self._nranks = ParallelEnv().nranks + self._local_rank = ParallelEnv().local_rank + + @property + def mode(self): + return self.model.mode + + @mode.setter + def mode(self, value): + self.model.mode = value + + def train_batch(self, inputs, labels=None): + assert self.model._optimizer, \ + "model not ready, please call `model.prepare()` first" + self.mode = 'train' + return self._run(inputs, labels) + + def eval_batch(self, inputs, labels=None): + self.mode = 'eval' + return self._run(inputs, labels) + + def test_batch(self, inputs): + self.mode = 'test' + return self._run(inputs, None) + + def parameters(self, *args, **kwargs): + return super(Model, self.model).parameters(*args, **kwargs) + + def save(self, path): + def _save(state, path): + if not state: + return + state = { + k: to_numpy(v) if isinstance(v, Variable) else v + for k, v in state.items() + } + with open(path, 'wb') as f: + pickle.dump(state, f) + + base = os.path.basename(path) + assert base != "", "path should be of 'dirname/filename' format" + dir_name = os.path.dirname(path) + if dir_name and not os.path.exists(dir_name): + os.makedirs(dir_name) + param_path = path + ".pdparams" + _save(self.model.state_dict(), param_path) + prog = self._progs.get('train', None) + if prog is None or self.model._optimizer is None: + return + # XXX `optimizer.state_dict()` only work in dygraph mode + optim_path = path + ".pdopt" + optim = { + p.name: p + for p in filter(is_belong_to_optimizer, prog.list_vars()) + } + if not optim: + return + + _save(optim, optim_path) + + def load(self, param_state_pairs, optim_state): + if self._executor is None: + executor = fluid.Executor(fluid.CPUPlace())._default_executor + else: + executor = self._executor._default_executor + + # restore parameter states + fluid.core._create_loaded_parameter( + [param for param, state in param_state_pairs], + global_scope(), executor) + for param, state in param_state_pairs: + self._set_var(param, state) + + # restore optimizer states + # FIXME what if a different optimizer is used? + if not self.model._optimizer or not optim_state: + return + self._load_optimizer(optim_state, executor) + + def _load_optimizer(self, state, executor): + prog = self._progs.get('train', None) + optim = list(filter(is_belong_to_optimizer, prog.list_vars())) + if not optim: + return + + fluid.core._create_loaded_parameter(optim, global_scope(), executor) + + converted_state = dict(state) + for var in optim: + if var.name in ["@LR_DECAY_COUNTER@", "global_step"]: + # When using learning rate scheduler, dygraph would name the + # global step var as "global_step" to save, while static-graph + # would has a state var named as "@LR_DECAY_COUNTER@". + # NOTE: dygraph saved global_step is 1 larger than that in + # static-graph, since the time of global_step to increase is + # different. + state_val = ( + np.array(converted_state.pop("global_step")) - 1 + ) if "global_step" in converted_state else converted_state.pop( + "@LR_DECAY_COUNTER@", None) + if state_val is not None: + converted_state[var.name] = state_val + elif var.name.startswith("learning_rate_"): + # When using static learning rate, static-graph would make it + # a persistable var named 'unique_name.generate("learning_rate")', + # However, dygraph wouldn't save it. + if var.name not in state: + continue + else: + # moment and other accumulators + if var.name not in converted_state: + # try to convert from dygraph name + opt_name = self.model._optimizer._name + opt_cls_name = self.model._optimizer.__class__.__name__ + opt_unq_name = None + for name in self.model._optimizer._accumulators.keys(): + accum_name = name if opt_name is None else name[len( + opt_name) + 1:] + for param_name, state_var in self.model._optimizer._accumulators[ + name].items(): + if opt_unq_name is None: + # can not infer out the exact unique(opt_name), + # thus try to extract rather than generate + for state_key in sorted( + state.keys(), + key=lambda x: len(x), + reverse=True): + prefix = param_name + "_" + ( + opt_cls_name + if opt_name is None else opt_name) + "_" + if state_key.startswith(prefix): + prefix_offset = state_key[len( + prefix):].find("_") + len(prefix) + opt_unq_name = state_key[len( + param_name + "_"):prefix_offset] + # TODO: assert + # assert opt_unq_name is None + # gen(param.name + "_" + gen(opt_name) + "_" + accum_name) + # always end with "_0" since the unique optimizer._name + dy_state_name = (param_name + "_" + opt_unq_name + + "_" + accum_name + "_0") + converted_state[ + state_var.name] = converted_state.pop( + dy_state_name) + + assert var.name in converted_state, \ + "variable [{}] is not in optimizer state file".format(var.name) + self._set_var(var, converted_state[var.name]) + + def _set_var(self, var, ndarray): + t = global_scope().find_var(var.name).get_tensor() + p = t._place() + if p.is_cpu_place(): + place = fluid.CPUPlace() + elif p.is_cuda_pinned_place(): + place = fluid.CUDAPinnedPlace() + else: + p = fluid.core.Place() + p.set_place(t._place()) + place = fluid.CUDAPlace(p.gpu_device_id()) + + t.set(ndarray, place) + + def _run(self, inputs, labels=None): + compiled_prog = self._compiled_progs.get(self.mode, None) + assert compiled_prog, \ + "Model is not ready, please call `model.prepare()` first" + + inputs = to_list(inputs) + if labels is not None: + labels = to_list(labels) + assert len(inputs) == len(self._input_vars[self.mode]), \ + "number of inputs" \ + + " does not match number of arguments of `forward` method" + + feed = {} + input_names = [v.name for v in self._input_vars[self.mode]] + for idx, n in enumerate(input_names): + # train and test may take different arguments + if inputs[idx] is not None: + feed[n] = inputs[idx] + if labels is not None: + for idx, v in enumerate(self._label_vars[self.mode]): + feed[v.name] = labels[idx] + + endpoints = self._endpoints[self.mode] + if self.mode == 'test': + fetch_list = endpoints['output'] + else: + metric_list, metric_splits = flatten_list(endpoints['metric']) + fetch_list = endpoints['loss'] + metric_list + num_loss = len(endpoints['loss']) + + # if fetch Variable is same as input Variable, do not fetch + # from program, get it from input directly + pruned_fetch_list = [] + pruned_fetch_idx_name_map = [""] * len(fetch_list) + for i, fetch_var in enumerate(fetch_list): + if fetch_var.name in feed.keys(): + pruned_fetch_idx_name_map[i] = fetch_var.name + else: + pruned_fetch_list.append(fetch_var) + + rets = self._executor.run(compiled_prog, + feed=feed, + fetch_list=pruned_fetch_list, + return_numpy=False) + + # restore pruned fetch_list Variable from feeds + for i, name in enumerate(pruned_fetch_idx_name_map): + if len(name) > 0: + rets.insert(i, feed[name]) + + # LoDTensor cannot be fetch as numpy directly + rets = [np.array(v) for v in rets] + if self.mode == 'test': + return rets[:] + losses = rets[:num_loss] + metric_states = restore_flatten_list(rets[num_loss:], metric_splits) + metrics = [] + for metric, state in zip(self.model._metrics, metric_states): + # cut off padding size + if self.mode != 'train' and self.model._test_dataloader is not None \ + and isinstance(self.model._test_dataloader, DataLoader) \ + and self._nranks > 1: + total_size = len(self.model._test_dataloader.dataset) + # TODO: fixme if have better way to get batch size + samples = state[0].shape[0] + current_count = self._merge_count.get(self.mode + '_total', 0) + if current_count + samples >= total_size: + state = [ + s[:int(total_size - current_count), ...] for s in state + ] + self._merge_count[self.mode + '_total'] = 0 + self._merge_count[self.mode + '_batch'] = int(total_size - + current_count) + else: + self._merge_count[self.mode + '_total'] += samples + self._merge_count[self.mode + '_batch'] = samples + + metrics.append(metric.update(*state)) + return (losses, metrics) if len(metrics) > 0 else losses + + def prepare(self): + modes = ['train', 'eval', 'test'] + for mode in modes: + self._make_program(mode) + self._compile_and_initialize(self._progs[mode], mode) + + def _make_program(self, mode): + prog = self._progs.get(mode, None) + if prog is not None: + return + + prog = self._orig_prog.clone() + # NOTE: When defining learning rate scheduling in static-graph, ops to + # increase the global step var and calculate learning rate would be + # prepended into _orig_prog. test program maked by `_orig_prog.clone` + # also would include these ops. Thus must prune these ops in test + # program, otherwise the global step would be changed in test. + if mode != 'train': + for op in list(prog.global_block().ops): + prog.global_block()._remove_op(0) + if mode == 'train' and self.model._optimizer \ + and self.model._optimizer._learning_rate_map: + # HACK workaround learning rate map issue + lr_var = self.model._optimizer._learning_rate_map[self._orig_prog] + new_lr_var = prog.global_block().vars[lr_var.name] + self.model._optimizer._learning_rate_map[prog] = new_lr_var + + losses = [] + metrics = [] + with fluid.program_guard(prog, self._startup_prog): + ins = self.model._inputs + lbls = self.model._labels if self.model._labels else [] + inputs = [k.forward() for k in to_list(ins)] + labels = [k.forward() for k in to_list(lbls)] + self._label_vars[mode] = labels + outputs = to_list(self.model.forward(*inputs)) + + if mode != 'test' and self.model._loss_function: + losses = self.model._loss_function(outputs, labels) + + if self._nranks > 1 and mode != 'train': + outputs = [_all_gather(o, self._nranks) for o in outputs] + if mode != 'test': + labels = [_all_gather(l, self._nranks) for l in labels] + + if mode != 'test': + for metric in self.model._metrics: + metrics.append( + to_list(metric.add_metric_op(*(outputs + labels)))) + + if mode == 'train' and self.model._optimizer: + self._loss_endpoint = fluid.layers.sum(losses) + if self._nranks > 1: + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + dist_strategy = DistributedStrategy() + dist_strategy.mode = "collective" + dist_strategy.collective_mode = "grad_allreduce" + self.model._optimizer = fleet.distributed_optimizer( + self.model._optimizer, strategy=dist_strategy) + + self.model._optimizer.minimize(self._loss_endpoint) + + if mode != 'train': # clone again to put it in test mode + prog = prog.clone(for_test=True) + + self._input_vars[mode] = inputs + + self._progs[mode] = prog + self._endpoints[mode] = { + "output": outputs, + "loss": losses, + "metric": metrics + } + + def _compile_and_initialize(self, prog, mode): + compiled_prog = self._compiled_progs.get(mode, None) + if compiled_prog is not None: + return compiled_prog + + assert self.model._place is not None, \ + "device is not set, please call `model.prepare()` first" + + place = self.model._place + + # XXX *ALL WEIGHTS* should be initialized upon model construction + # even if `forward()` may run different code path for different mode + # therefore startup program only needs to run once + if self._executor is None: + self._executor = fluid.Executor(place) + # XXX incremental initialization + uninitialized = [] + for var_py in self._startup_prog.list_vars(): + var = fluid.global_scope().find_var(var_py.name) + if not var_py.name.startswith('nccl_id') and var and \ + var.get_tensor()._is_initialized(): + continue + + uninitialized.append(var_py) + if uninitialized: + startup_prog = self._startup_prog._prune(uninitialized) + self._executor.run(startup_prog) + + if self._nranks < 2: + compiled_prog = fluid.CompiledProgram(prog) + else: + compiled_prog = prog + + self._compiled_progs[mode] = compiled_prog + + +class DynamicGraphAdapter(object): + def __init__(self, model): + super(DynamicGraphAdapter, self).__init__() + self.model = model + self._nranks = ParallelEnv().nranks + self._local_rank = ParallelEnv().local_rank + self._merge_count = { + 'eval_total': 0, + 'test_total': 0, + 'eval_batch': 0, + 'test_batch': 0 + } + + if self._nranks > 1: + stradegy = fluid.dygraph.parallel.ParallelStrategy() + stradegy.nranks = ParallelEnv().nranks + stradegy.local_rank = ParallelEnv().local_rank + stradegy.trainer_endpoints = ParallelEnv().trainer_endpoints + stradegy.current_endpoint = ParallelEnv().current_endpoint + self.ddp_model = fluid.dygraph.parallel.DataParallel(self.model, + stradegy) + + @property + def mode(self): + return self.model.mode + + @mode.setter + def mode(self, value): + self.model.mode = value + + # TODO multi device in dygraph mode not implemented at present time + def train_batch(self, inputs, labels=None): + assert self.model._optimizer, \ + "model not ready, please call `model.prepare()` first" + super(Model, self.model).train() + self.mode = 'train' + inputs = to_list(inputs) + if labels is not None: + labels = [to_variable(l) for l in to_list(labels)] + if self._nranks > 1: + outputs = self.ddp_model.forward(* [to_variable(x) for x in inputs]) + losses = self.model._loss_function(outputs, labels) + final_loss = fluid.layers.sum(losses) + final_loss = self.ddp_model.scale_loss(final_loss) + final_loss.backward() + self.ddp_model.apply_collective_grads() + else: + outputs = self.model.forward(* [to_variable(x) for x in inputs]) + losses = self.model._loss_function(outputs, labels) + final_loss = fluid.layers.sum(losses) + final_loss.backward() + + self.model._optimizer.minimize(final_loss) + self.model.clear_gradients() + metrics = [] + for metric in self.model._metrics: + metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list( + labels))) + m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)]) + metrics.append(m) + + return ([to_numpy(l) for l in losses], metrics) \ + if len(metrics) > 0 else [to_numpy(l) for l in losses] + + def eval_batch(self, inputs, labels=None): + super(Model, self.model).eval() + self.mode = 'eval' + inputs = to_list(inputs) + if labels is not None: + labels = [to_variable(l) for l in to_list(labels)] + outputs = self.model.forward(* [to_variable(x) for x in inputs]) + if self.model._loss_function: + losses = self.model._loss_function(outputs, labels) + else: + losses = [] + if self._nranks > 1: + outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)] + labels = [_all_gather(l, self._nranks) for l in labels] + metrics = [] + for metric in self.model._metrics: + # cut off padding value. + if self.model._test_dataloader is not None and self._nranks > 1 \ + and isinstance(self.model._test_dataloader, DataLoader): + total_size = len(self.model._test_dataloader.dataset) + samples = outputs[0].shape[0] + current_count = self._merge_count.get(self.mode + '_total', 0) + if current_count + samples >= total_size: + outputs = [ + o[:int(total_size - current_count)] for o in outputs + ] + labels = [ + l[:int(total_size - current_count)] for l in labels + ] + self._merge_count[self.mode + '_total'] = 0 + self._merge_count[self.mode + '_batch'] = int(total_size - + current_count) + else: + self._merge_count[self.mode + '_total'] += samples + self._merge_count[self.mode + '_batch'] = samples + + metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list( + labels))) + m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)]) + metrics.append(m) + + # To be consistent with static graph + # return empty loss if loss_function is None + return ([to_numpy(l) for l in losses], metrics) \ + if len(metrics) > 0 else [to_numpy(l) for l in losses] + + def test_batch(self, inputs): + super(Model, self.model).eval() + self.mode = 'test' + inputs = [to_variable(x) for x in to_list(inputs)] + outputs = self.model.forward(*inputs) + if self._nranks > 1 and isinstance(self.model._place, fluid.CUDAPlace): + outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)] + + return [to_numpy(o) for o in to_list(outputs)] + + def parameters(self, *args, **kwargs): + return super(Model, self.model).parameters(*args, **kwargs) + + def save(self, path): + params = self.model.state_dict() + fluid.save_dygraph(params, path) + if self.model._optimizer is None: + return + if self.model._optimizer.state_dict(): + optim = self.model._optimizer.state_dict() + fluid.save_dygraph(optim, path) + + def load(self, param_state_pairs, optim_state): + # restore parameter states + for param, state in param_state_pairs: + param.set_value(state) + + # resotre optimizer states + if not self.model._optimizer or not optim_state: + return + + # If optimizer performs set_dict when state vars haven't been created, + # which would happen when set_dict before minimize, the state would be + # stored in optimizer._accumulators_holder and loaded lazily. + # To contrive this when loading from static-graph saved states, extend + # state dict to include keys named accoring to dygraph naming rules. + # TODO: if len(self.model._optimizer._accumulators) > 0 + converted_state = dict(optim_state) + opt_unq_name = self.model._optimizer._name + if opt_unq_name is None: + opt_unq_name = '' + + opt_cls_name = self.model._optimizer.__class__.__name__ + opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx + param_names = [param.name for param in self.model.parameters()] + for var_name, state_var in sorted( + optim_state.items(), key=lambda x: len(x[0]), reverse=True): + if var_name in ["@LR_DECAY_COUNTER@", "global_step"]: + # NOTE: dygraph saved global_step is 1 larger than that in + # static-graph, since the time of global_step to increase is + # different. + if var_name == "@LR_DECAY_COUNTER@": + converted_state["global_step"] = np.array( + converted_state.pop("@LR_DECAY_COUNTER@")) + 1 + else: + # moment and other accumulators + # extend state dict to include promising dygraph names + for param_name in param_names: + if var_name.startswith(param_name + "_" + opt_name): + # when init optimizer with name + accum_name = var_name[len(param_name + "_" + opt_name + + "_"):] + elif var_name.startswith(param_name + + "_") and opt_name == opt_cls_name: + # when init optimizer without name + accum_name = var_name[len(param_name + "_"):] + else: + continue + # remove suffix idx + accum_name = accum_name[:accum_name.rfind("_")] + # state names always end with "_0" in dygraph because of the + # unique optimizer._name + dy_state_name = (param_name + "_" + opt_unq_name + "_" + + accum_name + "_0") + converted_state[dy_state_name] = state_var + + self.model._optimizer.set_dict(converted_state) + + +class Model(fluid.dygraph.Layer): + """ + An Model object is network with training and inference features. + Dynamic graph and static graph are supported at the same time, + switched by `fluid.enable_dygraph()`. The usage is as follows. + But note, the switching between dynamic and static should be before + instantiating a Model. The input description, i.e, hapi.Input, + must be required for static graph. + + Usage: + .. code-block:: python + + import numpy as np + import paddle + import paddle.fluid as fluid + #import paddle.incubate.hapi as hapi + from paddle.incubate.hapi import Model, Input, set_device + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.dataset import MNIST + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(784, 10, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + device = set_device('gpu') + # if use static graph, do not set + fluid.enable_dygraph(device) + model = MyModel() + optim = fluid.optimizer.SGD(learning_rate=1e-3, + parameter_list=model.parameters()) + + inputs = [Input([None, 784], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + + mnist_data = MNIST(mode='train') + model.prepare(optim, + CrossEntropy(average=True), + hapi.metrics.Accuracy(), + inputs, + labels, + device=device) + model.fit(mnist_data, epochs=2, batch_size=32, verbose=1) + """ + + def __init__(self): + super(Model, self).__init__(self.__class__.__name__) + self.mode = 'train' + self._inputs = None + self._labels = None + self._loss_function = None + self._loss_weights = None + self._optimizer = None + self._device = None + self._optimizer = None + self._test_dataloader = None + + # init backend + if fluid.in_dygraph_mode(): + self._adapter = DynamicGraphAdapter(self) + else: + self._adapter = StaticGraphAdapter(self) + + def train_batch(self, inputs, labels=None): + """ + Run one training step on a batch of data. + + Args: + inputs (list): A list of numpy.ndarray, each is a batch of + input data. + labels (list): A list of numpy.ndarray, each is a batch of + input label. If has no labels, set None. Default is None. + + Returns: + A list of scalar training loss if the model has no metrics, + or a tuple (list of scalar loss, list of metrics) if the model + set metrics. + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.incubate.hapi import Model, Input, set_device + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = Linear(784, 1, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + device = hapi.set_device('gpu') + fluid.enable_dygraph(device) + + model = MyModel() + optim = fluid.optimizer.SGD(learning_rate=1e-3, + parameter_list=model.parameters()) + + inputs = [Input([None, 784], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + model.prepare(optim, + CrossEntropy(average=True), + inputs=inputs, + labels=labels, + device=device) + data = np.random.random(size=(4,784)).astype(np.float32) + label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) + loss = model.train_batch([data], [label]) + print(loss) + """ + return self._adapter.train_batch(inputs, labels) + + def eval_batch(self, inputs, labels=None): + """ + Run one evaluating step on a batch of data. + + Args: + inputs (list): A list of numpy.ndarray, each is a batch of + input data. + labels (list): A list of numpy.ndarray, each is a batch of + input label. If has no labels, set None. Default is None. + + Returns: + A list of scalar testing loss if the model has no metrics, + or a tuple (list of scalar loss, list of metrics) if the model + set metrics. + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.incubate.hapi import Model, Input, set_device + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + device = set_device('gpu') + fluid.enable_dygraph(device) + + model = MyModel() + optim = fluid.optimizer.SGD(learning_rate=1e-3, + parameter_list=model.parameters()) + + inputs = [Input([None, 784], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + model.prepare(optim, + CrossEntropy(average=True), + inputs=inputs, + labels=labels, + device=device) + data = np.random.random(size=(4,784)).astype(np.float32) + label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) + loss = model.eval_batch([data], [label]) + print(loss) + """ + return self._adapter.eval_batch(inputs, labels) + + def test_batch(self, inputs): + """ + Run one testing step on a batch of data. + + Args: + inputs (list): A list of numpy.ndarray, each is a batch of + input data. + + Returns: + A list of numpy.ndarray of predictions, that is the outputs + of Model forward. + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.incubate.hapi import Model, Input, set_device + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + device = set_device('gpu') + fluid.enable_dygraph(device) + + model = MyModel() + inputs = [Input([None, 784], 'float32', name='x')] + model.prepare(inputs=inputs, + device=device) + data = np.random.random(size=(4,784)).astype(np.float32) + out = model.eval_batch([data]) + print(out) + """ + return self._adapter.test_batch(inputs) + + def save(self, path): + """ + This function saves parameters, optimizer infomation to path. + + The parameters contains all the trainable Variable, will save to + a file with suffix ".pdparams". + The optimizer information contains all the variable used by optimizer. + For Adam optimizer, contains beta1, beta2, momentum etc. All the + information will save to a file with suffix ".pdopt". (If the optimizer + have no variable need to save (like SGD), the fill will not generated). + + This function will silently overwrite existing file + at the target location. + + Args: + path (str): The file prefix to save model. The format is + 'dirname/file_prefix' or 'file_prefix'. if empty str. A exception + will be raised. + + Returns: + None + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + from paddle.incubate.hapi import Model, set_device + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + device = set_device('cpu') + fluid.enable_dygraph(device) + model = MyModel() + model.save('checkpoint/test') + """ + if ParallelEnv().local_rank == 0: + self._adapter.save(path) + + def load(self, path, skip_mismatch=False, reset_optimizer=False): + """ + Load from files storing the model states and optimizer states. The file + for optimizer states is not necessary if no need to restore the optimizer. + + NOTE: parameters are retrieved out from the file storing model states + accoring to their structured names. + + For fine-tuning or transfer-learning models where some of the layers have + changed, keep parameters needed to restore have same structured names in + the pre-trained model and fine-tuning model. + + Args: + path (str): The prefix of files storing the model states and + optimizer states. The files would be `path.pdparams` and + `path.pdopt` separately, and the latter is not necessary + when no need to restore. + skip_mismatch (bool): Whether to skip the loading of mismatch + parameter or raise an error when mismatch happens (not found + the parameter in file storing model states of or receives a + mismatch shape). + reset_optimizer (bool): If True, ignore the providing file storing + optimizer states and initialize optimizer states from scratch. + Otherwise, restore optimizer states from `path.pdopt` if + a optimizer has been set to the model. Default False. + + Returns: + None + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + from paddle.incubate.hapi import Model, set_device + + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + device = set_device('cpu') + fluid.enable_dygraph(device) + model = MyModel() + model.load('checkpoint/test') + """ + + def _load_state_from_path(path): + if not os.path.exists(path): + return + with open(path, 'rb') as f: + return pickle.load(f) if six.PY2 else pickle.load( + f, encoding='latin1') + + def _check_match(key, param): + state = param_state.get(key, None) + if state is None: + raise ValueError( + "{} is not found in the providing file.".format(key)) + if list(state.shape) != list(param.shape): + raise ValueError( + "{} receives a shape {}, but the expected shape is {}.". + format(key, list(state.shape), list(param.shape))) + return param, state + + def _strip_postfix(path): + path, ext = os.path.splitext(path) + assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \ + "Unknown postfix {} from weights".format(ext) + return path + + path = _strip_postfix(path) + param_state = _load_state_from_path(path + ".pdparams") + assert param_state, "Failed to load parameters, please check path." + + matched_param_state = [] + for key, param in self.state_dict().items(): + try: + match_res = _check_match(key, param) + except ValueError as err: + if skip_mismatch: + warnings.warn( + ("Skip loading for {}. ".format(key) + str(err))) + # reset optimizer when mismatch happens + reset_optimizer = True + else: + raise err + matched_param_state.append(match_res) + + optim_state = None if reset_optimizer else _load_state_from_path( + path + ".pdopt") + return self._adapter.load(matched_param_state, optim_state) + + def parameters(self, *args, **kwargs): + """ + Returns a list of parameters of the model. + + Returns: + A list of Parameter in static graph. + A list of ParamBase in dynamic graph. + + Examples: + + .. code-block:: python + + from paddle.incubate.hapi.model import Model, Input, set_device + class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = fluid.dygraph.Linear(20, 10, act='softmax') + def forward(self, x): + y = self._fc(x) + return y + + fluid.enable_dygraph() + model = MyModel() + params = model.parameters() + """ + return self._adapter.parameters() + + def prepare(self, + optimizer=None, + loss_function=None, + metrics=None, + inputs=None, + labels=None, + device=None): + """ + Configures the model before runing. + + Args: + optimizer (Optimizer|None): Optimizer must be set in training + and should be a Optimizer instance. It can be None in eval + and test mode. + loss_function (Loss|None): Loss function must be set in training + and should be a Loss instance. It can be None when there is + no loss. + metrics (Metric|list of Metric|None): If metrics is set, all + metrics will be calculated and output in train/eval mode. + inputs (Input|list|dict|None): `inputs`, entry points of network, + could be a Input layer, or lits of Input layers, + or dict (name: Input), or None. For static graph, + inputs must be set. For dynamic graph, it could be None. + labels (Input|list|None): `labels`, entry points of network, + could be a Input layer or lits of Input layers, or None. + For static graph, if labels is required in loss_function, + labels must be set. Otherwise, it could be None. + device (str|fluid.CUDAPlace|fluid.CPUPlace|None): Specify device + type, 'CPU', 'GPU', fluid.CUDAPlace or fluid.CPUPlace. + If None, automatically select device according to + installation package version. + + Returns: + None + """ + + if isinstance(device, fluid.CUDAPlace) or \ + (isinstance(device, six.string_types) and device.lower() == 'gpu') \ + or (device is None and fluid.is_compiled_with_cuda()): + if isinstance(device, fluid.CUDAPlace): + self._place = device + else: + self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \ + if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0) + + global _parallel_context_initialized + if ParallelEnv().nranks > 1 and not _parallel_context_initialized: + if fluid.in_dygraph_mode(): + main_prog_seed = fluid.default_main_program().random_seed + startup_prog_seed = fluid.default_startup_program( + ).random_seed + fluid.disable_dygraph() + fluid.enable_dygraph(self._place) + # enable_dygraph would create and switch to a new program, + # thus also copy seed to the new program + fluid.default_main_program().random_seed = main_prog_seed + fluid.default_startup_program( + ).random_seed = startup_prog_seed + fluid.dygraph.parallel.prepare_context() + else: + prepare_distributed_context(self._place) + + _parallel_context_initialized = True + elif isinstance(device, fluid.CPUPlace): + self._place = device + elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \ + or (device is None): + self._place = fluid.CPUPlace() + else: + raise ValueError( + "Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \ + but got {}".format(device)) + + self._optimizer = optimizer + if loss_function: + if not isinstance(loss_function, Loss): + raise TypeError("'loss_function' must be sub classes of 'Loss'") + self._loss_function = loss_function + if not in_dygraph_mode(): + if not isinstance(inputs, (list, dict, Input)): + raise TypeError( + "'inputs' must be list or dict in static graph mode") + + metrics = metrics or [] + for metric in to_list(metrics): + assert isinstance(metric, Metric), \ + "{} is not sub class of Metric".format( + metric.__class__.__name__) + self._metrics = to_list(metrics) + + self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [ + inputs[n] for n in extract_args(self.forward) if n != 'self' + ] + self._labels = to_list(labels) + + if not in_dygraph_mode(): + self._adapter.prepare() + + def fit( + self, + train_data=None, + eval_data=None, + batch_size=1, + epochs=1, + eval_freq=1, + log_freq=10, + save_dir=None, + save_freq=1, + verbose=2, + drop_last=False, + shuffle=True, + num_workers=0, + callbacks=None, ): + """ + Trains the model for a fixed number of epochs. If `eval_data` is set, + evaluation will be done at the end of each epoch. + + Args: + train_data (Dataset|DataLoader): An iterable data loader is used for + train. An instance of paddle paddle.io.Dataset or + paddle.io.Dataloader is recomended. Default: None. + eval_data (Dataset|DataLoader): An iterable data loader is used for + evaluation at the end of epoch. If None, will not do evaluation. + An instance of paddle.io.Dataset or paddle.io.Dataloader + is recomended. Default: None. + batch_size (int): Integer number. The batch size of train_data + and eval_data. When train_data and eval_data are both the + instance of Dataloader, this parameter will be ignored. + Default: 1. + epochs (int): Integer number. The number of epochs to train + the model. Default: 1. + eval_freq (int): The frequency, in number of epochs, an evalutation + is performed. Default: 1. + log_freq (int): The frequency, in number of steps, the training logs + are printed. Default: 10. + save_dir(str|None): The directory to save checkpoint during training. + If None, will not save checkpoint. Default: None. + save_freq (int): The frequency, in number of epochs, to save + checkpoint. Default: 1. + verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, + 1 = progress bar, 2 = one line per epoch. Default: 2. + drop_last (bool): Whether drop the last incomplete batch of + train_data when dataset size is not divisible by the batch size. + When train_data is an instance of Dataloader, this parameter + will be ignored. Default: False. + shuffle (bool): Whther to shuffle train_data. When train_data is + an instance of Dataloader, this parameter will be ignored. + Default: True. + num_workers (int): The number of subprocess to load data, 0 for no + subprocess used and loading data in main process. + When train_data and eval_data are both the instance of + Dataloader, this parameter will be ignored. Default: 0. + callbacks (Callback|None): A list of `Callback` instances to apply + during training. If None, `ProgBarLogger` and `ModelCheckpoint` + are automatically inserted. Default: None. + + Returns: + None + + Examples: + 1. An example use Dataset and set btch size, shuffle in fit. + How to make a batch is done internally. + + .. code-block:: python + + from paddle.incubate.hapi.model import Model, Input, set_device + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.vision.models import LeNet + + dynamic = True + device = set_device(FLAGS.device) + fluid.enable_dygraph(device) if dynamic else None + + train_dataset = MNIST(mode='train') + val_dataset = MNIST(mode='test') + + inputs = [Input([None, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model = LeNet() + optim = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=model.parameters()) + model.prepare( + optim, + CrossEntropy(), + Accuracy(topk=(1, 2)), + inputs=inputs, + labels=labels, + device=device) + model.fit(train_dataset, + val_dataset, + epochs=2, + batch_size=64, + save_dir='mnist_checkpoint') + + 2. An example use DataLoader, batch size and shuffle is set in + DataLoader. + + .. code-block:: python + + from paddle.incubate.hapi.model import Model, Input, set_device + from paddle.incubate.hapi.loss import CrossEntropy + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.vision.models import LeNet + + dynamic = True + device = set_device(FLAGS.device) + fluid.enable_dygraph(device) if dynamic else None + + train_dataset = MNIST(mode='train') + train_loader = fluid.io.DataLoader(train_dataset, + places=device, batch_size=64) + val_dataset = MNIST(mode='test') + val_loader = fluid.io.DataLoader(val_dataset, + places=device, batch_size=64) + + inputs = [Input([None, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model = LeNet() + optim = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=model.parameters()) + model.prepare( + optim, + CrossEntropy(), + Accuracy(topk=(1, 2)), + inputs=inputs, + labels=labels, + device=device) + model.fit(train_loader, + val_loader, + epochs=2, + save_dir='mnist_checkpoint') + """ + + assert train_data is not None, \ + "train_data must be given!" + + if isinstance(train_data, Dataset): + train_sampler = DistributedBatchSampler( + train_data, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last) + train_loader = DataLoader( + train_data, + batch_sampler=train_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) + else: + train_loader = train_data + + if eval_data is not None and isinstance(eval_data, Dataset): + eval_sampler = DistributedBatchSampler( + eval_data, batch_size=batch_size) + eval_loader = DataLoader( + eval_data, + batch_sampler=eval_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) + elif eval_data is not None: + eval_loader = eval_data + else: + eval_loader = None + + do_eval = eval_loader is not None + self._test_dataloader = eval_loader + + steps = self._len_data_loader(train_loader) + cbks = config_callbacks( + callbacks, + model=self, + epochs=epochs, + steps=steps, + log_freq=log_freq, + save_freq=save_freq, + save_dir=save_dir, + verbose=verbose, + metrics=self._metrics_name(), ) + + cbks.on_begin('train') + for epoch in range(epochs): + + cbks.on_epoch_begin(epoch) + logs = self._run_one_epoch(train_loader, cbks, 'train') + cbks.on_epoch_end(epoch, logs) + + if do_eval and epoch % eval_freq == 0: + + eval_steps = self._len_data_loader(eval_loader) + cbks.on_begin('eval', { + 'steps': eval_steps, + 'metrics': self._metrics_name() + }) + + eval_logs = self._run_one_epoch(eval_loader, cbks, 'eval') + + cbks.on_end('eval', eval_logs) + + cbks.on_end('train', logs) + self._test_dataloader = None + + def evaluate( + self, + eval_data, + batch_size=1, + log_freq=10, + verbose=2, + num_workers=0, + callbacks=None, ): + """ + Evaluate the loss and metrics of the model on input dataset. + + Args: + eval_data (Dataset|DataLoader): An iterable data loader is used for + evaluation. An instance of paddle.io.Dataset or + paddle.io.Dataloader is recomended. + batch_size (int): Integer number. The batch size of train_data + and eval_data. When eval_data is the instance of Dataloader, + this argument will be ignored. Default: 1. + log_freq (int): The frequency, in number of steps, the eval logs + are printed. Default: 10. + verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, + 1 = progress bar, 2 = one line per epoch. Default: 2. + num_workers (int): The number of subprocess to load data, + 0 for no subprocess used and loading data in main process. When + train_data and eval_data are both the instance of Dataloader, + this parameter will be ignored. Default: 0. + callbacks (Callback|None): A list of `Callback` instances to apply + during training. If None, `ProgBarLogger` and `ModelCheckpoint` + are automatically inserted. Default: None. + Returns: + dict: Result of metric. The key is the names of Metric, + value is a scalar or numpy.array. + + Examples: + .. code-block:: python + + # declarative mode + import numpy as np + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.vision.transforms import Compose,Resize + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.model import Input, set_device + + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + val_dataset = MNIST(mode='test') + + model = LeNet() + model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels) + + result = model.evaluate(val_dataset, batch_size=64) + print(result) + + # imperative mode + import paddle.fluid.dygraph as dg + place = set_device('cpu') + with dg.guard(place) as g: + model = LeNet() + model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels) + + result = model.evaluate(val_dataset, batch_size=64) + print(result) + + """ + + if eval_data is not None and isinstance(eval_data, Dataset): + eval_sampler = DistributedBatchSampler( + eval_data, batch_size=batch_size) + eval_loader = DataLoader( + eval_data, + batch_sampler=eval_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) + else: + eval_loader = eval_data + + self._test_dataloader = eval_loader + + cbks = config_callbacks( + callbacks, + model=self, + log_freq=log_freq, + verbose=verbose, + metrics=self._metrics_name(), ) + + eval_steps = self._len_data_loader(eval_loader) + cbks.on_begin('eval', + {'steps': eval_steps, + 'metrics': self._metrics_name()}) + + logs = self._run_one_epoch(eval_loader, cbks, 'eval') + + cbks.on_end('eval', logs) + + self._test_dataloader = None + + eval_result = {} + for k in self._metrics_name(): + eval_result[k] = logs[k] + + return eval_result + + def predict(self, + test_data, + batch_size=1, + num_workers=0, + stack_outputs=False, + callbacks=None): + """ + Compute the output predictions on testing data. + + Args: + test_data (Dataset|DataLoader): An iterable data loader is used for + predict. An instance of paddle.io.Dataset or paddle.io.Dataloader + is recomended. + batch_size (int): Integer number. The batch size of train_data and eval_data. + When train_data and eval_data are both the instance of Dataloader, this + argument will be ignored. Default: 1. + num_workers (int): The number of subprocess to load data, 0 for no subprocess + used and loading data in main process. When train_data and eval_data are + both the instance of Dataloader, this argument will be ignored. Default: 0. + stack_output (bool): Whether stack output field like a batch, as for an output + filed of a sample is in shape [X, Y], test_data contains N samples, predict + output field will be in shape [N, X, Y] if stack_output is True, and will + be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs + is False. stack_outputs as False is used for LoDTensor output situation, + it is recommended set as True if outputs contains no LoDTensor. Default: False. + Returns: + list: output of models. + + Examples: + .. code-block:: python + + # declarative mode + import numpy as np + from paddle.incubate.hapi.metrics import Accuracy + from paddle.incubate.hapi.datasets import MNIST + from paddle.incubate.hapi.vision.transforms import Compose,Resize + from paddle.incubate.hapi.vision.models import LeNet + from paddle.incubate.hapi.model import Input, set_device + + class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + + def __getitem__(self, idx): + img = np.reshape(self.images[idx], [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + + test_dataset = MnistDataset(mode='test', return_label=False) + + model = LeNet() + model.prepare(inputs=inputs) + + result = model.predict(test_dataset, batch_size=64) + print(result) + + # imperative mode + import paddle.fluid.dygraph as dg + place = set_device('cpu') + with dg.guard(place) as g: + model = LeNet() + model.prepare(inputs=inputs) + + result = model.predict(test_dataset, batch_size=64) + print(result) + """ + + if test_data is not None and isinstance(test_data, Dataset): + test_sampler = DistributedBatchSampler( + test_data, batch_size=batch_size) + test_loader = DataLoader( + test_data, + batch_sampler=test_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) + else: + test_loader = test_data + + self._test_dataloader = test_loader + + cbks = config_callbacks(callbacks, model=self, verbose=1) + + test_steps = self._len_data_loader(test_loader) + logs = {'steps': test_steps} + + cbks.on_begin('test', logs) + + outputs = [] + + logs, outputs = self._run_one_epoch(test_loader, cbks, 'test') + + outputs = list(zip(*outputs)) + + # NOTE: for lod tensor output, we should not stack outputs + # for stacking may lose its detail info + if stack_outputs: + outputs = [np.vstack(outs) for outs in outputs] + + self._test_dataloader = None + + cbks.on_end('test', logs) + return outputs + + def save_inference_model(self, + save_dir, + model_filename=None, + params_filename=None, + model_only=False): + """ + Save inference model must in static mode. + + Args: + save_dir (str): The directory path to save the inference model. + model_filename (str|None): The name of file to save the inference + model itself. If is set None, a default filename + :code:`__model__` will be used. + params_filename (str|None): The name of file to save all related + parameters. If it is set None, parameters will be saved + in separate files . + model_only (bool): If True, It will save inference model only, + and do not save parameters. Default: False. + + Returns: + list: The fetch variables' name list + """ + assert not fluid.in_dygraph_mode( + ), 'Save inference model must in static mode!' + + prog = self._adapter._progs.get('test', None) + assert prog, \ + "Model is not ready, please call `model.prepare()` first" + + infer_prog = prog.clone(for_test=True) + + input_names = [v.name for v in self._adapter._input_vars['test']] + endpoints = self._adapter._endpoints['test']['output'] + + return fluid.io.save_inference_model( + save_dir, + input_names, + endpoints, + self._adapter._executor, + main_program=infer_prog, + model_filename=model_filename, + params_filename=params_filename, + program_only=model_only) + + def _run_one_epoch(self, data_loader, callbacks, mode, logs={}): + outputs = [] + for step, data in enumerate(data_loader): + # data might come from different types of data_loader and have + # different format, as following: + # 1. DataLoader in static graph: + # [[input1, input2, ..., label1, lable2, ...]] + # 2. DataLoader in dygraph + # [input1, input2, ..., label1, lable2, ...] + # 3. custumed iterator yield concated inputs and labels: + # [input1, input2, ..., label1, lable2, ...] + # 4. custumed iterator yield seperated inputs and labels: + # ([input1, input2, ...], [label1, lable2, ...]) + # To handle all of these, flatten (nested) list to list. + data = flatten(data) + # LoDTensor.shape is callable, where LoDTensor comes from + # DataLoader in static graph + batch_size = data[0].shape()[0] if callable(data[ + 0].shape) else data[0].shape[0] + + callbacks.on_batch_begin(mode, step, logs) + + if mode != 'test': + outs = getattr(self, mode + '_batch')(data[:len(self._inputs)], + data[len(self._inputs):]) + # losses + loss = outs[0] if self._metrics else outs + metrics = [[l[0] for l in loss]] + + # metrics + for metric in self._metrics: + res = metric.accumulate() + metrics.extend(to_list(res)) + + assert len(self._metrics_name()) == len(metrics) + for k, v in zip(self._metrics_name(), metrics): + logs[k] = v + else: + outs = getattr(self, mode + '_batch')(data) + outputs.append(outs) + + logs['step'] = step + if mode == 'train' or self._adapter._merge_count.get( + mode + '_batch', 0) <= 0: + logs['batch_size'] = batch_size * ParallelEnv().nranks + else: + logs['batch_size'] = self._adapter._merge_count[mode + '_batch'] + + callbacks.on_batch_end(mode, step, logs) + self._reset_metrics() + + if mode == 'test': + return logs, outputs + return logs + + def _reset_metrics(self): + for metric in self._metrics: + metric.reset() + + def _metrics_name(self): + metrics_name = ['loss'] + for m in self._metrics: + metrics_name.extend(to_list(m.name())) + return metrics_name + + def _len_data_loader(self, data_loader): + try: + steps = len(data_loader) + except Exception: + steps = None + return steps diff --git a/python/paddle/incubate/hapi/progressbar.py b/python/paddle/incubate/hapi/progressbar.py new file mode 100644 index 0000000000000000000000000000000000000000..2487fcbde8744fa7cc186e16b0653f03629d0366 --- /dev/null +++ b/python/paddle/incubate/hapi/progressbar.py @@ -0,0 +1,192 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import numpy as np +from collections import namedtuple + +__all__ = ['ProgressBar'] + + +class ProgressBar(object): + """progress bar """ + + def __init__(self, + num=None, + width=30, + verbose=1, + start=True, + file=sys.stdout): + self._num = num + if isinstance(num, int) and num <= 0: + raise TypeError('num should be None or integer (> 0)') + max_width = self._get_max_width() + self._width = width if width <= max_width else max_width + self._total_width = 0 + self._verbose = verbose + self.file = file + self._values = {} + self._values_order = [] + if start: + self._start = time.time() + self._last_update = 0 + + self._dynamic_display = ( + (hasattr(self.file, 'isatty') and + self.file.isatty()) or 'ipykernel' in sys.modules or + 'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ) + + def _get_max_width(self): + if sys.version_info > (3, 3): + from shutil import get_terminal_size + else: + try: + from backports.shutil_get_terminal_size import get_terminal_size + except: + + def get_terminal_size(): + terminal_size = namedtuple("terminal_size", "columns lines") + return terminal_size(80, 24) + + terminal_width, _ = get_terminal_size() + max_width = min(int(terminal_width * 0.6), terminal_width - 50) + return max_width + + def start(self): + self.file.flush() + self._start = time.time() + + def update(self, current_num, values=None): + now = time.time() + + if current_num: + time_per_unit = (now - self._start) / current_num + else: + time_per_unit = 0 + + if time_per_unit >= 1 or time_per_unit == 0: + fps = ' - %.0fs/%s' % (time_per_unit, 'step') + elif time_per_unit >= 1e-3: + fps = ' - %.0fms/%s' % (time_per_unit * 1e3, 'step') + else: + fps = ' - %.0fus/%s' % (time_per_unit * 1e6, 'step') + + info = '' + if self._verbose == 1: + prev_total_width = self._total_width + + if self._dynamic_display: + sys.stdout.write('\b' * prev_total_width) + sys.stdout.write('\r') + else: + sys.stdout.write('\n') + + if self._num is not None: + numdigits = int(np.log10(self._num)) + 1 + + bar_chars = ('step %' + str(numdigits) + 'd/%d [') % ( + current_num, self._num) + prog = float(current_num) / self._num + prog_width = int(self._width * prog) + + if prog_width > 0: + bar_chars += ('=' * (prog_width - 1)) + if current_num < self._num: + bar_chars += '>' + else: + bar_chars += '=' + bar_chars += ('.' * (self._width - prog_width)) + bar_chars += ']' + else: + bar_chars = 'step %3d' % current_num + + self._total_width = len(bar_chars) + sys.stdout.write(bar_chars) + + for k, val in values: + info += ' - %s:' % k + val = val if isinstance(val, list) else [val] + for i, v in enumerate(val): + if isinstance(v, (float, np.float32, np.float64)): + if abs(v) > 1e-3: + info += ' %.4f' % v + else: + info += ' %.4e' % v + else: + info += ' %s' % v + + if self._num is not None and current_num < self._num: + eta = time_per_unit * (self._num - current_num) + if eta > 3600: + eta_format = '%d:%02d:%02d' % (eta // 3600, + (eta % 3600) // 60, eta % 60) + elif eta > 60: + eta_format = '%d:%02d' % (eta // 60, eta % 60) + else: + eta_format = '%ds' % eta + + info += ' - ETA: %s' % eta_format + + info += fps + self._total_width += len(info) + if prev_total_width > self._total_width: + info += (' ' * (prev_total_width - self._total_width)) + + # newline for another epoch + if self._num is not None and current_num >= self._num: + info += '\n' + if self._num is None: + info += '\n' + + sys.stdout.write(info) + sys.stdout.flush() + self._last_update = now + elif self._verbose == 2: + if self._num: + numdigits = int(np.log10(self._num)) + 1 + count = ('step %' + str(numdigits) + 'd/%d') % (current_num, + self._num) + else: + count = 'step %3d' % current_num + info = count + info + + for k, val in values: + info += ' - %s:' % k + val = val if isinstance(val, list) else [val] + for v in val: + if isinstance(v, (float, np.float32, np.float64)): + if abs(v) > 1e-3: + info += ' %.4f' % v + else: + info += ' %.4e' % v + elif isinstance(v, np.ndarray) and \ + v.size == 1 and \ + v.dtype in [np.float32, np.float64]: + if abs(v[0]) > 1e-3: + info += ' %.4f' % v[0] + else: + info += ' %.4e' % v[0] + else: + info += ' %s' % v + + info += fps + info += '\n' + sys.stdout.write(info) + sys.stdout.flush() diff --git a/python/paddle/incubate/hapi/tests/CMakeLists.txt b/python/paddle/incubate/hapi/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..23eac04aa76060c5e3166b5e67c17fce82094ce9 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/CMakeLists.txt @@ -0,0 +1,45 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") +string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") + + +foreach(TEST_OP ${DIST_TEST_OPS}) + list(REMOVE_ITEM TEST_OPS ${TEST_OP}) +endforeach() + +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() + + +function(py_dist_test TARGET_NAME) + if(WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS ARGS ENVS) + cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32) + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true + FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 + PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + # No unit test should exceed 10 minutes. + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST" RUN_SERIAL TRUE) + endif() + + + endif() +endfunction() + + + +foreach(src ${DIST_TEST_OPS}) + message(STATUS ${src}) + py_dist_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b7b978621b91a8dc8d4cd2e37e0740965ab111 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py @@ -0,0 +1,100 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import unittest + +import numpy as np +import contextlib + +from paddle import fluid + +from paddle.incubate.hapi.model import Model, Input, set_device +from paddle.incubate.hapi.loss import CrossEntropy +from paddle.incubate.hapi.vision.models import LeNet +from paddle.incubate.hapi.metrics import Accuracy +from paddle.incubate.hapi.callbacks import ProgBarLogger +from paddle.incubate.hapi.datasets import MNIST + + +class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + + def __getitem__(self, idx): + img = np.reshape(self.images[idx], [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + +def compute_accuracy(pred, gt): + pred = np.argmax(pred, -1) + gt = np.array(gt) + + correct = pred[:, np.newaxis] == gt + + return np.sum(correct) / correct.shape[0] + + +@unittest.skipIf(not fluid.is_compiled_with_cuda(), + 'CPU testing is not supported') +class TestDistTraning(unittest.TestCase): + def test_static_multiple_gpus(self): + device = set_device('gpu') + + fluid.enable_dygraph(device) + im_shape = (-1, 1, 28, 28) + batch_size = 128 + + inputs = [Input(im_shape, 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + train_dataset = MnistDataset(mode='train') + val_dataset = MnistDataset(mode='test') + test_dataset = MnistDataset(mode='test', return_label=False) + + model = LeNet() + optim = fluid.optimizer.Momentum( + learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) + loss = CrossEntropy() + model.prepare(optim, loss, Accuracy(), inputs, labels, device=device) + cbk = ProgBarLogger(50) + + model.fit(train_dataset, + val_dataset, + epochs=2, + batch_size=batch_size, + callbacks=cbk) + + eval_result = model.evaluate(val_dataset, batch_size=batch_size) + + output = model.predict( + test_dataset, batch_size=batch_size, stack_outputs=True) + + np.testing.assert_equal(output[0].shape[0], len(test_dataset)) + + acc = compute_accuracy(output[0], val_dataset.labels) + + np.testing.assert_allclose(acc, eval_result['acc']) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py new file mode 100644 index 0000000000000000000000000000000000000000..31ba9104b7106c16a232084ba6d99316d0b65475 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py @@ -0,0 +1,99 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import unittest + +import numpy as np +import contextlib + +from paddle import fluid + +from paddle.incubate.hapi.model import Model, Input, set_device +from paddle.incubate.hapi.loss import CrossEntropy +from paddle.incubate.hapi.vision.models import LeNet +from paddle.incubate.hapi.metrics import Accuracy +from paddle.incubate.hapi.callbacks import ProgBarLogger +from paddle.incubate.hapi.datasets import MNIST + + +class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + + def __getitem__(self, idx): + img = np.reshape(self.images[idx], [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + +def compute_accuracy(pred, gt): + pred = np.argmax(pred, -1) + gt = np.array(gt) + + correct = pred[:, np.newaxis] == gt + + return np.sum(correct) / correct.shape[0] + + +@unittest.skipIf(not fluid.is_compiled_with_cuda(), + 'CPU testing is not supported') +class TestDistTraning(unittest.TestCase): + def test_static_multiple_gpus(self): + device = set_device('gpu') + + im_shape = (-1, 1, 28, 28) + batch_size = 128 + + inputs = [Input(im_shape, 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + train_dataset = MnistDataset(mode='train') + val_dataset = MnistDataset(mode='test') + test_dataset = MnistDataset(mode='test', return_label=False) + + model = LeNet() + optim = fluid.optimizer.Momentum( + learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) + loss = CrossEntropy() + model.prepare(optim, loss, Accuracy(), inputs, labels, device=device) + cbk = ProgBarLogger(50) + + model.fit(train_dataset, + val_dataset, + epochs=2, + batch_size=batch_size, + callbacks=cbk) + + eval_result = model.evaluate(val_dataset, batch_size=batch_size) + + output = model.predict( + test_dataset, batch_size=batch_size, stack_outputs=True) + + np.testing.assert_equal(output[0].shape[0], len(test_dataset)) + + acc = compute_accuracy(output[0], val_dataset.labels) + + np.testing.assert_allclose(acc, eval_result['acc']) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_callbacks.py b/python/paddle/incubate/hapi/tests/test_callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..d8630038cd87f4fa1cd864d7b0eeffa6e4b2b8c2 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_callbacks.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import time +import random +import tempfile +import shutil + +from paddle.incubate.hapi.model import Input +from paddle.incubate.hapi.vision.models import LeNet +from paddle.incubate.hapi.callbacks import config_callbacks + + +class TestCallbacks(unittest.TestCase): + def setUp(self): + self.save_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.save_dir) + + def run_callback(self): + epochs = 2 + steps = 50 + freq = 2 + eval_steps = 20 + + lenet = LeNet() + inputs = [Input([None, 1, 28, 28], 'float32', name='image')] + lenet.prepare(inputs=inputs) + + cbks = config_callbacks( + model=lenet, + batch_size=128, + epochs=epochs, + steps=steps, + log_freq=freq, + verbose=self.verbose, + metrics=['loss', 'acc'], + save_dir=self.save_dir) + cbks.on_begin('train') + + logs = {'loss': 50.341673, 'acc': 0.00256} + for epoch in range(epochs): + cbks.on_epoch_begin(epoch) + for step in range(steps): + cbks.on_batch_begin('train', step, logs) + logs['loss'] -= random.random() * 0.1 + logs['acc'] += random.random() * 0.1 + time.sleep(0.005) + cbks.on_batch_end('train', step, logs) + cbks.on_epoch_end(epoch, logs) + + eval_logs = {'eval_loss': 20.341673, 'eval_acc': 0.256} + params = { + 'steps': eval_steps, + 'metrics': ['eval_loss', 'eval_acc'], + } + cbks.on_begin('eval', params) + for step in range(eval_steps): + cbks.on_batch_begin('eval', step, eval_logs) + eval_logs['eval_loss'] -= random.random() * 0.1 + eval_logs['eval_acc'] += random.random() * 0.1 + eval_logs['batch_size'] = 2 + time.sleep(0.005) + cbks.on_batch_end('eval', step, eval_logs) + cbks.on_end('eval', eval_logs) + + test_logs = {} + params = {'steps': eval_steps} + cbks.on_begin('test', params) + for step in range(eval_steps): + cbks.on_batch_begin('test', step, test_logs) + test_logs['batch_size'] = 2 + time.sleep(0.005) + cbks.on_batch_end('test', step, test_logs) + cbks.on_end('test', test_logs) + + cbks.on_end('train') + + def test_callback_verbose_0(self): + self.verbose = 0 + self.run_callback() + + def test_callback_verbose_1(self): + self.verbose = 1 + self.run_callback() + + def test_callback_verbose_2(self): + self.verbose = 2 + self.run_callback() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_datasets.py b/python/paddle/incubate/hapi/tests/test_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..7f544e5ad84d5aa2041e8fdb6c1ac77cc34d8164 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_datasets.py @@ -0,0 +1,159 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestFolderDatasets(unittest.TestCase): + def setUp(self): + self.data_dir = tempfile.mkdtemp() + self.empty_dir = tempfile.mkdtemp() + for i in range(2): + sub_dir = os.path.join(self.data_dir, 'class_' + str(i)) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + for j in range(2): + fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8') + cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img) + + def tearDown(self): + shutil.rmtree(self.data_dir) + + def test_dataset(self): + dataset_folder = DatasetFolder(self.data_dir) + + for _ in dataset_folder: + pass + + assert len(dataset_folder) == 4 + assert len(dataset_folder.classes) == 2 + + dataset_folder = DatasetFolder(self.data_dir) + for _ in dataset_folder: + pass + + def test_folder(self): + loader = ImageFolder(self.data_dir) + + for _ in loader: + pass + + loader = ImageFolder(self.data_dir) + for _ in loader: + pass + + assert len(loader) == 4 + + def test_transform(self): + def fake_transform(img): + return img + + transfrom = fake_transform + dataset_folder = DatasetFolder(self.data_dir, transform=transfrom) + + for _ in dataset_folder: + pass + + loader = ImageFolder(self.data_dir, transform=transfrom) + for _ in loader: + pass + + def test_errors(self): + with self.assertRaises(RuntimeError): + ImageFolder(self.empty_dir) + with self.assertRaises(RuntimeError): + DatasetFolder(self.empty_dir) + + with self.assertRaises(ValueError): + _check_exists_and_download('temp_paddle', None, None, None, False) + + +class TestMNISTTest(unittest.TestCase): + def test_main(self): + mnist = MNIST(mode='test') + self.assertTrue(len(mnist) == 10000) + + for i in range(len(mnist)): + image, label = mnist[i] + self.assertTrue(image.shape[0] == 1) + self.assertTrue(image.shape[1] == 28) + self.assertTrue(image.shape[2] == 28) + self.assertTrue(label.shape[0] == 1) + self.assertTrue(0 <= int(label) <= 9) + + +class TestMNISTTrain(unittest.TestCase): + def test_main(self): + mnist = MNIST(mode='train', chw_format=False) + self.assertTrue(len(mnist) == 60000) + + for i in range(len(mnist)): + image, label = mnist[i] + self.assertTrue(image.shape[0] == 784) + self.assertTrue(label.shape[0] == 1) + self.assertTrue(0 <= int(label) <= 9) + + +class TestFlowersTrain(unittest.TestCase): + def test_main(self): + flowers = Flowers(mode='train') + self.assertTrue(len(flowers) == 6149) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 6149) + image, label = flowers[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(image.shape[2] == 3) + self.assertTrue(label.shape[0] == 1) + + +class TestFlowersValid(unittest.TestCase): + def test_main(self): + flowers = Flowers(mode='valid') + self.assertTrue(len(flowers) == 1020) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1020) + image, label = flowers[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(image.shape[2] == 3) + self.assertTrue(label.shape[0] == 1) + + +class TestFlowersTest(unittest.TestCase): + def test_main(self): + flowers = Flowers(mode='test') + self.assertTrue(len(flowers) == 1020) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1020) + image, label = flowers[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(image.shape[2] == 3) + self.assertTrue(label.shape[0] == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py b/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e75e08e3749e6ce629e88c486e4f87d9109dc709 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py @@ -0,0 +1,130 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import os +import time +import copy +import subprocess +import paddle.fluid as fluid + +from paddle.distributed.utils import find_free_ports, watch_local_trainers, get_cluster, TrainerProc + + +def get_cluster_from_args(selected_gpus): + cluster_node_ips = '127.0.0.1' + node_ip = '127.0.0.1' + + node_ips = [x.strip() for x in cluster_node_ips.split(',')] + + node_ips.index(node_ip) + + free_ports = None + + free_ports = find_free_ports(len(selected_gpus)) + if free_ports is not None: + free_ports = list(free_ports) + return get_cluster(node_ips, node_ip, free_ports, selected_gpus) + + +def get_gpus(selected_gpus): + selected_gpus = [x.strip() for x in selected_gpus.split(',')] + return selected_gpus + + +def start_local_trainers(cluster, + pod, + training_script, + training_script_args, + log_dir=None): + current_env = copy.copy(os.environ.copy()) + #paddle broadcast ncclUniqueId use socket, and + #proxy maybe make trainers unreachable, so delete them. + #if we set them to "", grpc will log error message "bad uri" + #so just delete them. + current_env.pop("http_proxy", None) + current_env.pop("https_proxy", None) + + procs = [] + for t in pod.trainers: + proc_env = { + "FLAGS_selected_gpus": "%s" % ",".join([str(g) for g in t.gpus]), + "PADDLE_TRAINER_ID": "%d" % t.rank, + "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), + "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()) + } + + current_env.update(proc_env) + + print("trainer proc env:{}".format(current_env)) + + if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': + cmd = "python -m coverage run --branch -p " + training_script + else: + cmd = "python -u " + training_script + + print("start trainer proc:{} env:{}".format(cmd, proc_env)) + + fn = None + + proc = subprocess.Popen(cmd.split(" "), env=current_env) + + tp = TrainerProc() + tp.proc = proc + tp.rank = t.rank + tp.log_fn = fn + tp.cmd = cmd + + procs.append(tp) + + return procs + + +class TestMultipleGpus(unittest.TestCase): + def run_mnist_2gpu(self, target_file_name): + if fluid.core.get_cuda_device_count() == 0: + return + + selected_gpus = get_gpus('0,1') + cluster = None + pod = None + + cluster, pod = get_cluster_from_args(selected_gpus) + + procs = start_local_trainers( + cluster, + pod, + training_script=target_file_name, + training_script_args=[]) + + while True: + alive = watch_local_trainers(procs, cluster.trainers_nranks()) + + if not alive: + print("Local procs complete, POD info:{}".format(pod)) + break + time.sleep(3) + + def test_hapi_multiple_gpus_static(self): + self.run_mnist_2gpu('dist_hapi_mnist_static.py') + + def test_hapi_multiple_gpus_dynamic(self): + self.run_mnist_2gpu('dist_hapi_mnist_dynamic.py') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_download.py b/python/paddle/incubate/hapi/tests/test_download.py new file mode 100644 index 0000000000000000000000000000000000000000..9b43b9a58dc4eff633d03ad1cc5ed4274f0f3c76 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_download.py @@ -0,0 +1,50 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddle.incubate.hapi.download import get_weights_path_from_url + + +class TestDownload(unittest.TestCase): + def download(self, url, md5sum): + get_weights_path_from_url(url, md5sum) + + def test_download_model(self): + url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams' + md5sum = '8ff74f291f72533f2a7956a4efff9d88' + self.download(url, md5sum) + + def test_exist_download(self): + url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams' + md5sum = '8ff74f291f72533f2a7956a4efff9d88' + self.download(url, md5sum) + + def test_download_without_md5sum(self): + url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams' + self.download(url, None) + + def test_download_errors(self): + with self.assertRaises(RuntimeError): + url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams' + md5sum = '8ff74f291f72533f2a7956a4eftttttt' + self.download(url, md5sum) + + with self.assertRaises(RuntimeError): + url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams' + self.download(url, None) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_logger.py b/python/paddle/incubate/hapi/tests/test_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..f25d0ee4f7e2f0db1031f1f2884fb6df338003cc --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_logger.py @@ -0,0 +1,49 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import unittest +import os +import numpy as np +import shutil +import tempfile + +from paddle.incubate.hapi.logger import setup_logger + + +class TestSetupLogger(unittest.TestCase): + def setUp(self): + self.save_dir = tempfile.mkdtemp() + self.save_file = os.path.join(self.save_dir, 'logger.txt') + + def tearDown(self): + shutil.rmtree(self.save_dir) + + def logger(self, output=None): + setup_logger(output=output) + + def test_logger_no_output(self): + self.logger() + + def test_logger_dir(self): + self.logger(self.save_dir) + + def test_logger_file(self): + self.logger(self.save_file) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_loss.py b/python/paddle/incubate/hapi/tests/test_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f729b38b81f333c6d871fc2e21c1cea988d78437 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_loss.py @@ -0,0 +1,111 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import unittest +import os +import six +import numpy as np +import shutil +import copy + +import paddle +from paddle import fluid + +from paddle.incubate.hapi.model import Model, Input +from paddle.incubate.hapi.loss import CrossEntropy, SoftmaxWithCrossEntropy + + +def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + # clip to shiftx, otherwise, when calc loss with + # log(exp(shiftx)), may get log(0)=INF + shiftx = (x - np.max(x)).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + +def randomize_probability(batch_size, class_num, dtype='float32'): + prob = np.random.uniform( + 0.1, 1.0, size=(batch_size, class_num)).astype(dtype) + prob_sum = prob.sum(axis=1) + for i in six.moves.xrange(len(prob)): + prob[i] /= prob_sum[i] + return prob + + +def numpy_ce(x, label): + return np.asmatrix( + [[-np.log(x[i][label[i][0]])] for i in range(x.shape[0])], + dtype="float32").mean() + + +class TestLoss(unittest.TestCase): + def test_cross_entropy(self): + class_num = 100 + batch_size = 128 + inputs = [randomize_probability(128, class_num) for _ in range(2)] + + labels = [ + np.random.randint( + 0, class_num, (batch_size, 1), dtype="int64") for _ in range(2) + ] + + gt_out = [numpy_ce(inputs[i], labels[i]) for i in range(2)] + + fluid.enable_dygraph() + cross_entropy = CrossEntropy() + out = cross_entropy( + [fluid.dygraph.to_variable(x) for x in inputs], + [fluid.dygraph.to_variable(label) for label in labels]) + out = [o.numpy() for o in out] + + for o, g in zip(out, gt_out): + np.testing.assert_allclose(o, g, atol=1e-5) + + def test_soft_cross_entronpy(self): + class_num = 100 + batch_size = 128 + + inputs = [randomize_probability(128, class_num) for _ in range(2)] + + labels = [ + np.random.randint( + 0, class_num, (batch_size, 1), dtype="int64") for _ in range(2) + ] + + fluid.enable_dygraph() + softmax_cross_entropy = SoftmaxWithCrossEntropy() + + softmax_cross_entropy( + [fluid.dygraph.to_variable(x) for x in inputs], + [fluid.dygraph.to_variable(label) for label in labels]) + + softmax_cross_entropy = SoftmaxWithCrossEntropy(average=False) + + inputs = [randomize_probability(128, class_num)] + + labels = [ + np.random.randint( + 0, class_num, (batch_size, 1), dtype="int64") + ] + + softmax_cross_entropy([fluid.dygraph.to_variable(x) for x in inputs], + fluid.dygraph.to_variable(labels[0])) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_metrics.py b/python/paddle/incubate/hapi/tests/test_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..3d25a275d5f1c539ce959c5231a7af771b229836 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_metrics.py @@ -0,0 +1,132 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import os +import unittest +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid.dygraph.base import to_variable + +from paddle.incubate.hapi.metrics import * +from paddle.incubate.hapi.utils import to_list + + +def accuracy(pred, label, topk=(1, )): + maxk = max(topk) + pred = np.argsort(pred)[:, ::-1][:, :maxk] + correct = (pred == np.repeat(label, maxk, 1)) + + batch_size = label.shape[0] + res = [] + for k in topk: + correct_k = correct[:, :k].sum() + res.append(correct_k / batch_size) + return res + + +def convert_to_one_hot(y, C): + oh = np.random.random((y.shape[0], C)).astype('float32') * .5 + for i in range(y.shape[0]): + oh[i, int(y[i])] = 1. + return oh + + +class TestAccuracyDynamic(unittest.TestCase): + def setUp(self): + self.topk = (1, ) + self.class_num = 5 + self.sample_num = 1000 + self.name = None + + def random_pred_label(self): + label = np.random.randint(0, self.class_num, + (self.sample_num, 1)).astype('int64') + pred = np.random.randint(0, self.class_num, + (self.sample_num, 1)).astype('int32') + pred_one_hot = convert_to_one_hot(pred, self.class_num) + pred_one_hot = pred_one_hot.astype('float32') + + return label, pred_one_hot + + def test_main(self): + with fluid.dygraph.guard(fluid.CPUPlace()): + acc = Accuracy(topk=self.topk, name=self.name) + for _ in range(10): + label, pred = self.random_pred_label() + label_var = to_variable(label) + pred_var = to_variable(pred) + state = to_list(acc.add_metric_op(pred_var, label_var)) + acc.update(* [s.numpy() for s in state]) + res_m = acc.accumulate() + res_f = accuracy(pred, label, self.topk) + assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \ + "Accuracy precision error: {} != {}".format(res_m, res_f) + acc.reset() + assert np.sum(acc.total) == 0 + assert np.sum(acc.count) == 0 + + +class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic): + def setUp(self): + self.topk = (1, 5) + self.class_num = 10 + self.sample_num = 1000 + self.name = "accuracy" + + +class TestAccuracyStatic(TestAccuracyDynamic): + def test_main(self): + main_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(main_prog, startup_prog): + pred = fluid.data( + name='pred', shape=[None, self.class_num], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + acc = Accuracy(topk=self.topk, name=self.name) + state = acc.add_metric_op(pred, label) + + exe = fluid.Executor(fluid.CPUPlace()) + compiled_main_prog = fluid.CompiledProgram(main_prog) + + for _ in range(10): + label, pred = self.random_pred_label() + state_ret = exe.run(compiled_main_prog, + feed={'pred': pred, + 'label': label}, + fetch_list=[s.name for s in to_list(state)], + return_numpy=True) + acc.update(*state_ret) + res_m = acc.accumulate() + res_f = accuracy(pred, label, self.topk) + assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \ + "Accuracy precision error: {} != {}".format(res_m, res_f) + acc.reset() + assert np.sum(acc.total) == 0 + assert np.sum(acc.count) == 0 + + +class TestAccuracyStaticMultiTopk(TestAccuracyStatic): + def setUp(self): + self.topk = (1, 5) + self.class_num = 10 + self.sample_num = 1000 + self.name = "accuracy" + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_model.py b/python/paddle/incubate/hapi/tests/test_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e49ec5651ff160fce0c80e8804b55b18baadd3c3 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_model.py @@ -0,0 +1,483 @@ +# copyright (c) 2020 paddlepaddle authors. all rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import unittest + +import os +import numpy as np +import shutil +import tempfile + +from paddle import fluid +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear +from paddle.fluid.dygraph.container import Sequential +from paddle.fluid.dygraph.base import to_variable + +from paddle.incubate.hapi.model import Model, Input, set_device +from paddle.incubate.hapi.loss import CrossEntropy +from paddle.incubate.hapi.metrics import Accuracy +from paddle.incubate.hapi.datasets import MNIST +from paddle.incubate.hapi.vision.models import LeNet +from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context + + +class LeNetDygraph(fluid.dygraph.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(LeNetDygraph, self).__init__() + self.num_classes = num_classes + self.features = Sequential( + Conv2D( + 1, 6, 3, stride=1, padding=1), + Pool2D(2, 'max', 2), + Conv2D( + 6, 16, 5, stride=1, padding=0), + Pool2D(2, 'max', 2)) + + if num_classes > 0: + self.fc = Sequential( + Linear(400, 120), + Linear(120, 84), + Linear( + 84, 10, act=classifier_activation)) + + def forward(self, inputs): + x = self.features(inputs) + + if self.num_classes > 0: + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + +class MnistDataset(MNIST): + def __init__(self, mode, return_label=True, sample_num=None): + super(MnistDataset, self).__init__(mode=mode) + self.return_label = return_label + if sample_num: + self.images = self.images[:sample_num] + self.labels = self.labels[:sample_num] + + def __getitem__(self, idx): + img, label = self.images[idx], self.labels[idx] + img = np.reshape(img, [1, 28, 28]) + if self.return_label: + return img, np.array(self.labels[idx]).astype('int64') + return img, + + def __len__(self): + return len(self.images) + + +def compute_acc(pred, label): + pred = np.argmax(pred, -1) + label = np.array(label) + correct = pred[:, np.newaxis] == label + return np.sum(correct) / correct.shape[0] + + +def dynamic_train(model, dataloader): + optim = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=model.parameters()) + model.train() + for inputs, labels in dataloader: + outputs = model(inputs) + loss = fluid.layers.cross_entropy(outputs, labels) + avg_loss = fluid.layers.reduce_sum(loss) + avg_loss.backward() + optim.minimize(avg_loss) + model.clear_gradients() + + +def dynamic_evaluate(model, dataloader): + with fluid.dygraph.no_grad(): + model.eval() + cnt = 0 + for inputs, labels in dataloader: + outputs = model(inputs) + + cnt += (np.argmax(outputs.numpy(), -1)[:, np.newaxis] == + labels.numpy()).astype('int').sum() + + return cnt / len(dataloader.dataset) + + +@unittest.skipIf(not fluid.is_compiled_with_cuda(), + 'CPU testing is not supported') +class TestModel(unittest.TestCase): + @classmethod + def setUpClass(cls): + if not fluid.is_compiled_with_cuda(): + self.skipTest('module not tested when ONLY_CPU compling') + cls.device = set_device('gpu') + fluid.enable_dygraph(cls.device) + + sp_num = 1280 + cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num) + cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num) + cls.test_dataset = MnistDataset( + mode='test', return_label=False, sample_num=sp_num) + + cls.train_loader = fluid.io.DataLoader( + cls.train_dataset, places=cls.device, batch_size=64) + cls.val_loader = fluid.io.DataLoader( + cls.val_dataset, places=cls.device, batch_size=64) + cls.test_loader = fluid.io.DataLoader( + cls.test_dataset, places=cls.device, batch_size=64) + + seed = 333 + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + dy_lenet = LeNetDygraph() + cls.init_param = dy_lenet.state_dict() + dynamic_train(dy_lenet, cls.train_loader) + + cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader) + + cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + cls.labels = [Input([None, 1], 'int64', name='label')] + + cls.save_dir = tempfile.mkdtemp() + cls.weight_path = os.path.join(cls.save_dir, 'lenet') + fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path) + + fluid.disable_dygraph() + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.save_dir) + + def test_fit_dygraph(self): + self.fit(True) + + def test_fit_static(self): + self.fit(False) + + def test_evaluate_dygraph(self): + self.evaluate(True) + + def test_evaluate_static(self): + self.evaluate(False) + + def test_predict_dygraph(self): + self.predict(True) + + def test_predict_static(self): + self.predict(False) + + def test_prepare_context(self): + prepare_distributed_context() + + def fit(self, dynamic): + fluid.enable_dygraph(self.device) if dynamic else None + seed = 333 + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + model = LeNet() + optim_new = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=model.parameters()) + model.prepare( + optim_new, + loss_function=CrossEntropy(average=False), + metrics=Accuracy(), + inputs=self.inputs, + labels=self.labels) + model.fit(self.train_dataset, batch_size=64, shuffle=False) + + result = model.evaluate(self.val_dataset, batch_size=64) + np.testing.assert_allclose(result['acc'], self.acc1) + + train_sampler = DistributedBatchSampler( + self.train_dataset, batch_size=64, shuffle=False) + val_sampler = DistributedBatchSampler( + self.val_dataset, batch_size=64, shuffle=False) + + train_loader = fluid.io.DataLoader( + self.train_dataset, + batch_sampler=train_sampler, + places=self.device, + return_list=True) + + val_loader = fluid.io.DataLoader( + self.val_dataset, + batch_sampler=val_sampler, + places=self.device, + return_list=True) + + model.fit(train_loader, val_loader) + fluid.disable_dygraph() if dynamic else None + + def evaluate(self, dynamic): + fluid.enable_dygraph(self.device) if dynamic else None + model = LeNet() + model.prepare( + metrics=Accuracy(), inputs=self.inputs, labels=self.labels) + model.load(self.weight_path) + result = model.evaluate(self.val_dataset, batch_size=64) + np.testing.assert_allclose(result['acc'], self.acc1) + + sampler = DistributedBatchSampler( + self.val_dataset, batch_size=64, shuffle=False) + + val_loader = fluid.io.DataLoader( + self.val_dataset, + batch_sampler=sampler, + places=self.device, + return_list=True) + + model.evaluate(val_loader) + + fluid.disable_dygraph() if dynamic else None + + def predict(self, dynamic): + fluid.enable_dygraph(self.device) if dynamic else None + model = LeNet() + model.prepare(inputs=self.inputs) + model.load(self.weight_path) + output = model.predict( + self.test_dataset, batch_size=64, stack_outputs=True) + np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) + + acc = compute_acc(output[0], self.val_dataset.labels) + np.testing.assert_allclose(acc, self.acc1) + + sampler = DistributedBatchSampler( + self.test_dataset, batch_size=64, shuffle=False) + + test_loader = fluid.io.DataLoader( + self.test_dataset, + batch_sampler=sampler, + places=self.device, + return_list=True) + + model.evaluate(test_loader) + + fluid.disable_dygraph() if dynamic else None + + +class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self._fc = Linear(20, 10, act='softmax') + + def forward(self, x): + y = self._fc(x) + return y + + +class TestModelFunction(unittest.TestCase): + def set_seed(self, seed=1024): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + def test_train_batch(self, dynamic=True): + dim = 20 + data = np.random.random(size=(4, dim)).astype(np.float32) + label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64) + + def get_expect(): + fluid.enable_dygraph(fluid.CPUPlace()) + self.set_seed() + m = MyModel() + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=m.parameters()) + m.train() + output = m(to_variable(data)) + l = to_variable(label) + loss = fluid.layers.cross_entropy(output, l) + avg_loss = fluid.layers.reduce_sum(loss) + avg_loss.backward() + optim.minimize(avg_loss) + m.clear_gradients() + fluid.disable_dygraph() + return avg_loss.numpy() + + ref = get_expect() + for dynamic in [True, False]: + device = set_device('cpu') + fluid.enable_dygraph(device) if dynamic else None + self.set_seed() + model = MyModel() + + optim2 = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + + inputs = [Input([None, dim], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + model.prepare( + optim2, + loss_function=CrossEntropy(average=False), + inputs=inputs, + labels=labels, + device=device) + loss, = model.train_batch([data], [label]) + + np.testing.assert_allclose(loss.flatten(), ref.flatten()) + fluid.disable_dygraph() if dynamic else None + + def test_test_batch(self, dynamic=True): + dim = 20 + data = np.random.random(size=(4, dim)).astype(np.float32) + + def get_expect(): + fluid.enable_dygraph(fluid.CPUPlace()) + self.set_seed() + m = MyModel() + m.eval() + output = m(to_variable(data)) + fluid.disable_dygraph() + return output.numpy() + + ref = get_expect() + for dynamic in [True, False]: + device = set_device('cpu') + fluid.enable_dygraph(device) if dynamic else None + self.set_seed() + model = MyModel() + inputs = [Input([None, dim], 'float32', name='x')] + model.prepare(inputs=inputs, device=device) + out, = model.test_batch([data]) + + np.testing.assert_allclose(out, ref) + fluid.disable_dygraph() if dynamic else None + + def test_save_load(self): + path = tempfile.mkdtemp() + for dynamic in [True, False]: + device = set_device('cpu') + fluid.enable_dygraph(device) if dynamic else None + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare( + inputs=inputs, + optimizer=optim, + loss_function=CrossEntropy(average=False), + labels=labels) + model.save(path + '/test') + model.load(path + '/test') + shutil.rmtree(path) + fluid.disable_dygraph() if dynamic else None + + def test_dynamic_save_static_load(self): + path = tempfile.mkdtemp() + # for dynamic in [True, False]: + device = set_device('cpu') + fluid.enable_dygraph(device) #if dynamic else None + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare( + inputs=inputs, + optimizer=optim, + loss_function=CrossEntropy(average=False), + labels=labels) + model.save(path + '/test') + fluid.disable_dygraph() + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare( + inputs=inputs, + optimizer=optim, + loss_function=CrossEntropy(average=False), + labels=labels) + model.load(path + '/test') + shutil.rmtree(path) + + def test_static_save_dynamic_load(self): + path = tempfile.mkdtemp() + + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare( + inputs=inputs, + optimizer=optim, + loss_function=CrossEntropy(average=False), + labels=labels) + model.save(path + '/test') + + device = set_device('cpu') + fluid.enable_dygraph(device) #if dynamic else None + + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + labels = [Input([None, 1], 'int64', name='label')] + optim = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare( + inputs=inputs, + optimizer=optim, + loss_function=CrossEntropy(average=False), + labels=labels) + model.load(path + '/test') + shutil.rmtree(path) + fluid.disable_dygraph() + + def test_parameters(self): + for dynamic in [True, False]: + device = set_device('cpu') + fluid.enable_dygraph(device) if dynamic else None + model = MyModel() + inputs = [Input([None, 20], 'float32', name='x')] + model.prepare(inputs=inputs) + params = model.parameters() + self.assertTrue(params[0].shape[0] == 20) + self.assertTrue(params[0].shape[1] == 10) + fluid.disable_dygraph() if dynamic else None + + def test_export_deploy_model(self): + model = LeNet() + inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] + model.prepare(inputs=inputs) + save_dir = tempfile.mkdtemp() + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + tensor_img = np.array( + np.random.random((1, 1, 28, 28)), dtype=np.float32) + ori_results = model.test_batch(tensor_img) + + model.save_inference_model(save_dir) + + place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + [inference_program, feed_target_names, fetch_targets] = ( + fluid.io.load_inference_model( + dirname=save_dir, executor=exe)) + + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + + np.testing.assert_allclose(results, ori_results) + shutil.rmtree(save_dir) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_progressbar.py b/python/paddle/incubate/hapi/tests/test_progressbar.py new file mode 100644 index 0000000000000000000000000000000000000000..ff315ef505606aaf45b46a722de8f0386ae2d5ed --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_progressbar.py @@ -0,0 +1,70 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +import random +import time + +from paddle.incubate.hapi.progressbar import ProgressBar + + +class TestProgressBar(unittest.TestCase): + def prog_bar(self, num, epoch, width, verbose=1): + for epoch in range(epoch): + progbar = ProgressBar(num, verbose=verbose) + values = [ + ['loss', 50.341673], + ['acc', 0.00256], + ] + for step in range(1, num + 1): + values[0][1] -= random.random() * 0.1 + values[1][1] += random.random() * 0.1 + if step % 10 == 0: + progbar.update(step, values) + time.sleep(0.002) + progbar.update(step, values) + + progbar.update(1, [['loss', int(1)]]) + progbar.update(1, [['loss', 'INF']]) + progbar.update(1, [['loss', 1e-4]]) + progbar.update(1, [['loss', np.array([1.])]]) + progbar.update(1, [['loss', np.array([1e-4])]]) + progbar.start() + + progbar.update(0, values) + progbar._dynamic_display = False + progbar.update(1e4, values) + + progbar._num = None + progbar.update(0, values) + progbar._num = 1 + progbar.update(1 + 1e-4, values) + + def test1(self): + self.prog_bar(50, 1, 30) + + def test2(self): + self.prog_bar(50, 2, 30) + + def test4(self): + self.prog_bar(50, 2, 30, verbose=2) + + def test_errors(self): + with self.assertRaises(TypeError): + ProgressBar(-1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/utils.py b/python/paddle/incubate/hapi/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4182d81d9fc1a593252ceeed7ba8943373a239a9 --- /dev/null +++ b/python/paddle/incubate/hapi/utils.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import numpy as np + +from paddle import fluid +from paddle.fluid.framework import Variable +from paddle.fluid.executor import global_scope + + +def to_list(value): + if value is None: + return value + if isinstance(value, (list, tuple)): + return list(value) + return [value] + + +def to_numpy(var): + assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable" + if isinstance(var, fluid.core.VarBase): + return var.numpy() + t = global_scope().find_var(var.name).get_tensor() + return np.array(t) + + +def flatten_list(l): + assert isinstance(l, list), "not a list" + outl = [] + splits = [] + for sl in l: + assert isinstance(sl, list), "sub content not a list" + splits.append(len(sl)) + outl += sl + return outl, splits + + +def restore_flatten_list(l, splits): + outl = [] + for split in splits: + assert len(l) >= split, "list length invalid" + sl, l = l[:split], l[split:] + outl.append(sl) + return outl + + +def extract_args(func): + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(func)[0] + else: + return inspect.getargspec(func)[0] diff --git a/python/paddle/incubate/hapi/vision/__init__.py b/python/paddle/incubate/hapi/vision/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac43effc883b24987f896265c0b7145f91025f82 --- /dev/null +++ b/python/paddle/incubate/hapi/vision/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import models +from .models import * + +__all__ = models.__all__ diff --git a/python/paddle/incubate/hapi/vision/models/__init__.py b/python/paddle/incubate/hapi/vision/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed457798cba948553a46472273fc6fd0f6703768 --- /dev/null +++ b/python/paddle/incubate/hapi/vision/models/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from . import lenet +from .lenet import * + +__all__ = lenet.__all__ diff --git a/python/paddle/incubate/hapi/vision/models/lenet.py b/python/paddle/incubate/hapi/vision/models/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..c49addcb1fb2482b5dc92330df3094f68f95ac2f --- /dev/null +++ b/python/paddle/incubate/hapi/vision/models/lenet.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear +from paddle.fluid.dygraph.container import Sequential + +from ...model import Model + +__all__ = ['LeNet'] + + +class LeNet(Model): + """LeNet model from + `"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_ + + Args: + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 10. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + + Examples: + .. code-block:: python + + from paddle.incubate.hapi.vision.models import LeNet + + model = LeNet() + """ + + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(LeNet, self).__init__() + self.num_classes = num_classes + self.features = Sequential( + Conv2D( + 1, 6, 3, stride=1, padding=1), + Pool2D(2, 'max', 2), + Conv2D( + 6, 16, 5, stride=1, padding=0), + Pool2D(2, 'max', 2)) + + if num_classes > 0: + self.fc = Sequential( + Linear(400, 120), + Linear(120, 84), + Linear( + 84, 10, act=classifier_activation)) + + def forward(self, inputs): + x = self.features(inputs) + + if self.num_classes > 0: + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x diff --git a/python/setup.py.in b/python/setup.py.in index 4fc2c352a53285b08ea5fe223f647ac74b6ede14..959842d8492cecdef018c42404539210b4543009 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -178,6 +178,10 @@ packages=['paddle', 'paddle.fluid.incubate.fleet.parameter_server.pslib', 'paddle.fluid.incubate.fleet.collective', 'paddle.fluid.incubate.fleet.utils', + 'paddle.incubate.hapi', + 'paddle.incubate.hapi.datasets', + 'paddle.incubate.hapi.vision', + 'paddle.incubate.hapi.vision.models', 'paddle.io', 'paddle.nn', 'paddle.nn.functional', diff --git a/tools/wlist.json b/tools/wlist.json index cb6f9a6c9a41f0da4ba0e391614b9871a8367f75..594b40d650c33e0c4091b2b5bcee7ee6d6ebc6b1 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -102,7 +102,27 @@ "load_persistables_for_inference", "cache", "buffered", - "xmap_readers" + "xmap_readers", + "Metric.reset", + "Metric.update", + "Metric.accumulate", + "Metric.name", + "Metric.add_metric_op", + "Callback.set_params", + "Callback.on_train_begin", + "Callback.on_train_end", + "Callback.on_eval_begin", + "Callback.on_eval_end", + "Callback.on_test_begin", + "Callback.on_test_end", + "Callback.on_epoch_begin", + "Callback.on_epoch_end", + "Callback.on_train_batch_begin", + "Callback.on_train_batch_end", + "Callback.on_eval_batch_begin", + "Callback.on_eval_batch_end", + "Callback.on_test_batch_begin", + "Callback.on_test_batch_end" ], "wlist_no_op_pass":[ "gelu",