未验证 提交 43625bda 编写于 作者: Q qingqing01 提交者: GitHub

Add a high-level API with traning and inference into Paddle. (#24293)

* Merge hapi into Paddle

Hapi is a high level API for training and inference.
The main modules include Model, Loss, Metrics, Dataset.
Also includes common modules and models in NLP and computer vision, such as BERT, ResNet.

These modules are developed by:
0YuanZhang0, guoshengCS heavengate, LielinJiang, qingqing01, xyzhou-puck huangjun12, wangxiao1021, zhangyang.
上级 4af3ec0f
......@@ -787,6 +787,15 @@ set +x
multiple_card_tests="$multiple_card_tests|^$testcase$"
fi
else
if [[ "${#single_card_tests}" -gt 3000 ]];then
if [[ "$single_card_tests_1" == "" ]]; then
single_card_tests_1="^$testcase$"
else
single_card_tests_1="$single_card_tests_1|^$testcase$"
fi
continue
fi
if [[ "$single_card_tests" == "" ]]; then
single_card_tests="^$testcase$"
else
......@@ -800,6 +809,7 @@ set +x
done <<< "$test_cases";
card_test "$single_card_tests" 1 # run cases with single GPU
card_test "$single_card_tests_1" 1 # run cases with single GPU
card_test "$multiple_card_tests" 2 # run cases with two GPUs
card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs
if [[ "$EXIT_CODE" != "0" ]]; then
......
......@@ -96,6 +96,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/fluid/tests)
add_subdirectory(paddle/fluid/contrib/tests)
add_subdirectory(paddle/fluid/contrib/slim/tests)
add_subdirectory(paddle/incubate/hapi/tests)
endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels
......
......@@ -202,6 +202,9 @@ from .tensor.stat import var #DEFINE_ALIAS
# from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
from . import incubate
from .incubate import hapi
from .fluid.dygraph.base import enable_dygraph #DEFINE_ALIAS
from .fluid.dygraph.base import disable_dygraph #DEFINE_ALIAS
from .fluid.framework import in_dygraph_mode #DEFINE_ALIAS
......
......@@ -11,3 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import hapi
__all__ = []
__all__ += hapi.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import logger
from . import progressbar
from . import callbacks
from . import download
from . import model
from . import metrics
from . import loss
from . import datasets
from . import distributed
from . import vision
logger.setup_logger()
__all__ = [
'callbacks',
'datasets',
'distributed',
'download',
'metrics',
'loss',
'vision',
]
__all__ += model.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.dygraph.parallel import ParallelEnv
from .progressbar import ProgressBar
__all__ = ['Callback', 'ProgBarLogger', 'ModelCheckpoint']
def config_callbacks(callbacks=None,
model=None,
batch_size=None,
epochs=None,
steps=None,
log_freq=2,
verbose=2,
save_freq=1,
save_dir=None,
metrics=None,
mode='train'):
cbks = callbacks or []
cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks]
if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose:
cbks = [ProgBarLogger(log_freq, verbose=verbose)] + cbks
if not any(isinstance(k, ModelCheckpoint) for k in cbks):
cbks = cbks + [ModelCheckpoint(save_freq, save_dir)]
cbk_list = CallbackList(cbks)
cbk_list.set_model(model)
metrics = metrics or [] if mode != 'test' else []
params = {
'batch_size': batch_size,
'epochs': epochs,
'steps': steps,
'verbose': verbose,
'metrics': metrics,
}
cbk_list.set_params(params)
return cbk_list
class CallbackList(object):
def __init__(self, callbacks=None):
# copy
self.callbacks = [c for c in callbacks]
self.params = {}
self.model = None
def append(self, callback):
self.callbacks.append(callback)
def __iter__(self):
return iter(self.callbacks)
def set_params(self, params):
for c in self.callbacks:
c.set_params(params)
def set_model(self, model):
for c in self.callbacks:
c.set_model(model)
def _call(self, name, *args):
for c in self.callbacks:
func = getattr(c, name)
func(*args)
def _check_mode(self, mode):
assert mode in ['train', 'eval', 'test'], \
'mode should be train, eval or test'
def on_begin(self, mode, logs=None):
self._check_mode(mode)
name = 'on_{}_begin'.format(mode)
self._call(name, logs)
def on_end(self, mode, logs=None):
self._check_mode(mode)
name = 'on_{}_end'.format(mode)
self._call(name, logs)
def on_epoch_begin(self, epoch=None, logs=None):
self._call('on_epoch_begin', epoch, logs)
def on_epoch_end(self, epoch=None, logs=None):
self._call('on_epoch_end', epoch, logs)
def on_batch_begin(self, mode, step=None, logs=None):
self._check_mode(mode)
name = 'on_{}_batch_begin'.format(mode)
self._call(name, step, logs)
def on_batch_end(self, mode, step=None, logs=None):
self._check_mode(mode)
name = 'on_{}_batch_end'.format(mode)
self._call(name, step, logs)
class Callback(object):
"""
Base class used to build new callbacks.
Examples:
.. code-block:: python
from paddle.incubate.hapi.callbacks import Callback
# build a simple model checkpoint callback
class ModelCheckpoint(Callback):
def __init__(self, save_freq=1, save_dir=None):
self.save_freq = save_freq
self.save_dir = save_dir
def on_epoch_end(self, epoch, logs=None):
if self.model is not None and epoch % self.save_freq == 0:
path = '{}/{}'.format(self.save_dir, epoch)
print('save checkpoint at {}'.format(path))
self.model.save(path)
"""
def __init__(self):
self.model = None
self.params = {}
def set_params(self, params):
"""
Set parameters, which is dict. The keys contain:
- 'batch_size': an integer. Number of samples per batch.
- 'epochs': an integer. Number of epochs.
- 'steps': an integer. Number of steps of one epoch.
- 'verbose': an integer. Verbose mode is 0, 1 or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch.
- 'metrics': a list of str. Names of metrics, including 'loss'
and the names of hapi.Metric.
"""
self.params = params
def set_model(self, model):
"""model is instance of hapi.Model.
"""
self.model = model
def on_train_begin(self, logs=None):
"""Called at the start of training.
Args:
logs (dict): The logs is a dict or None.
"""
def on_train_end(self, logs=None):
"""Called at the end of training.
Args:
logs (dict): The logs is a dict or None. The keys of logs
passed by hapi.Model contains 'loss', metric names and
`batch_size`.
"""
def on_eval_begin(self, logs=None):
"""Called at the start of evaluation.
Args:
logs (dict): The logs is a dict or None. The keys of logs
passed by hapi.Model contains 'steps' and 'metrics',
The `steps` is number of total steps of validation dataset.
The `metrics` is a list of str including 'loss' and the names
of hapi.Metric.
"""
def on_eval_end(self, logs=None):
"""Called at the end of evaluation.
Args:
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict contains 'loss', metrics and 'batch_size'
of last batch of validation dataset.
"""
def on_test_begin(self, logs=None):
"""Called at the beginning of predict.
Args:
logs (dict): The logs is a dict or None.
"""
def on_test_end(self, logs=None):
"""Called at the end of predict.
Args:
logs (dict): The logs is a dict or None.
"""
def on_epoch_begin(self, epoch, logs=None):
"""Called at the beginning of each epoch.
Args:
epoch (int): The index of epoch.
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is None.
"""
def on_epoch_end(self, epoch, logs=None):
"""Called at the end of each epoch.
Args:
epoch (int): The index of epoch.
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
of last batch.
"""
def on_train_batch_begin(self, step, logs=None):
"""Called at the beginning of each batch in training.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is empty.
"""
def on_train_batch_end(self, step, logs=None):
"""Called at the end of each batch in training.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
of current batch.
"""
def on_eval_batch_begin(self, step, logs=None):
"""Called at the beginning of each batch in evaluation.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is empty.
"""
def on_eval_batch_end(self, step, logs=None):
"""Called at the end of each batch in evaluation.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
of current batch.
"""
def on_test_batch_begin(self, step, logs=None):
"""Called at the beginning of each batch in predict.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None.
"""
def on_test_batch_end(self, step, logs=None):
"""Called at the end of each batch in predict.
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None.
"""
class ProgBarLogger(Callback):
"""Logger callback function
Args:
log_freq (int): The frequency, in number of steps, the logs such as `loss`,
`metrics` are printed. Default: 1.
verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
Examples:
.. code-block:: python
import numpy as np
from paddle import fluid
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.model import Input, set_device
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MNIST(mode='train')
model = LeNet()
optim = fluid.optimizer.Adam(0.001)
model.prepare(optimizer=optim,
loss_function=CrossEntropy(),
metrics=Accuracy(),
inputs=inputs,
labels=labels)
callback = ProgBarLogger(log_freq=10)
model.fit(train_dataset, batch_size=64, callbacks=callback)
"""
def __init__(self, log_freq=1, verbose=2):
self.epochs = None
self.steps = None
self.progbar = None
self.verbose = verbose
self.log_freq = log_freq
def _is_print(self):
return self.verbose and ParallelEnv().local_rank == 0
def on_train_begin(self, logs=None):
self.epochs = self.params['epochs']
assert self.epochs
self.train_metrics = self.params['metrics']
assert self.train_metrics
def on_epoch_begin(self, epoch=None, logs=None):
self.steps = self.params['steps']
self.epoch = epoch
self.train_step = 0
if self.epochs and self._is_print():
print('Epoch %d/%d' % (epoch + 1, self.epochs))
self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
def _updates(self, logs, mode):
values = []
metrics = getattr(self, '%s_metrics' % (mode))
progbar = getattr(self, '%s_progbar' % (mode))
steps = getattr(self, '%s_step' % (mode))
for k in metrics:
if k in logs:
values.append((k, logs[k]))
progbar.update(steps, values)
def on_train_batch_end(self, step, logs=None):
logs = logs or {}
self.train_step += 1
if self._is_print() and self.train_step % self.log_freq == 0:
if self.steps is None or self.train_step < self.steps:
self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
if self._is_print() and (self.steps is not None):
self._updates(logs, 'train')
def on_eval_begin(self, logs=None):
self.eval_steps = logs.get('steps', None)
self.eval_metrics = logs.get('metrics', [])
self.eval_step = 0
self.evaled_samples = 0
self.eval_progbar = ProgressBar(
num=self.eval_steps, verbose=self.verbose)
if self._is_print():
print('Eval begin...')
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
self.eval_step += 1
samples = logs.get('batch_size', 1)
self.evaled_samples += samples
if self._is_print() and self.eval_step % self.log_freq == 0:
if self.eval_steps is None or self.eval_step < self.eval_steps:
self._updates(logs, 'eval')
def on_test_begin(self, logs=None):
self.test_steps = logs.get('steps', None)
self.test_metrics = logs.get('metrics', [])
self.test_step = 0
self.tested_samples = 0
self.test_progbar = ProgressBar(
num=self.test_steps, verbose=self.verbose)
if self._is_print():
print('Predict begin...')
def on_test_batch_end(self, step, logs=None):
logs = logs or {}
self.test_step += 1
samples = logs.get('batch_size', 1)
self.tested_samples += samples
if self.test_step % self.log_freq == 0 and self._is_print():
if self.test_steps is None or self.test_step < self.test_steps:
self._updates(logs, 'test')
def on_eval_end(self, logs=None):
logs = logs or {}
if self._is_print() and (self.eval_steps is not None):
self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples))
def on_test_end(self, logs=None):
logs = logs or {}
if self._is_print():
if self.test_step % self.log_freq != 0 or self.verbose == 1:
self._updates(logs, 'test')
print('Predict samples: %d' % (self.tested_samples))
class ModelCheckpoint(Callback):
"""Model checkpoint callback function
Args:
save_freq(int): The frequency, in number of epochs, the model checkpoint
are saved. Default: 1.
save_dir(str|None): The directory to save checkpoint during training.
If None, will not save checkpoint. Default: None.
Examples:
.. code-block:: python
import numpy as np
from paddle import fluid
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.callbacks import ModelCheckpoint
from paddle.incubate.hapi.model import Input, set_device
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MNIST(mode='train')
model = LeNet()
optim = fluid.optimizer.Adam(0.001)
model.prepare(optimizer=optim,
loss_function=CrossEntropy(),
metrics=Accuracy(),
inputs=inputs,
labels=labels)
callback = ModelCheckpoint(save_dir='./temp')
model.fit(train_dataset, batch_size=64, callbacks=callback)
"""
def __init__(self, save_freq=1, save_dir=None):
self.save_freq = save_freq
self.save_dir = save_dir
def on_epoch_begin(self, epoch=None, logs=None):
self.epoch = epoch
def _is_save(self):
return self.model and self.save_dir and ParallelEnv().local_rank == 0
def on_epoch_end(self, epoch, logs=None):
if self._is_save() and self.epoch % self.save_freq == 0:
path = '{}/{}'.format(self.save_dir, epoch)
print('save checkpoint at {}'.format(path))
self.model.save(path)
def on_train_end(self, logs=None):
if self._is_save():
path = '{}/final'.format(self.save_dir)
print('save checkpoint at {}'.format(path))
self.model.save(path)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import folder
from . import mnist
from . import flowers
from .folder import *
from .mnist import *
from .flowers import *
__all__ = folder.__all__ \
+ mnist.__all__ \
+ flowers.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import io
import tarfile
import numpy as np
import scipy.io as scio
from PIL import Image
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["Flowers"]
DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz'
LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat'
SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
# In official 'readme', tstid is the flag of test data
# and trnid is the flag of train data. But test data is more than train data.
# So we exchange the train data and test data.
MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"}
class Flowers(Dataset):
"""
Implement of flowers dataset
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
label_file(str): path to label file, can be set None if
:attr:`download` is True. Default None
setid_file(str): path to subset index file, can be set
None if :attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Examples:
.. code-block:: python
from paddle.incubate.hapi.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
data_file=None,
label_file=None,
setid_file=None,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'valid', 'test'], \
"mode should be 'train', 'valid' or 'test', but got {}".format(mode)
self.flag = MODE_FLAG_MAP[mode.lower()]
self.data_file = data_file
if self.data_file is None:
assert download, "data_file not set and auto download disabled"
self.data_file = _check_exists_and_download(
data_file, DATA_URL, DATA_MD5, 'flowers', download)
self.label_file = label_file
if self.label_file is None:
assert download, "label_file not set and auto download disabled"
self.label_file = _check_exists_and_download(
label_file, LABEL_URL, LABEL_MD5, 'flowers', download)
self.setid_file = setid_file
if self.setid_file is None:
assert download, "setid_file not set and auto download disabled"
self.setid_file = _check_exists_and_download(
setid_file, SETID_URL, SETID_MD5, 'flowers', download)
self.transform = transform
# read dataset into memory
self._load_anno()
def _load_anno(self):
self.name2mem = {}
self.data_tar = tarfile.open(self.data_file)
for ele in self.data_tar.getmembers():
self.name2mem[ele.name] = ele
self.labels = scio.loadmat(self.label_file)['labels'][0]
self.indexes = scio.loadmat(self.setid_file)[self.flag][0]
def __getitem__(self, idx):
index = self.indexes[idx]
label = np.array([self.labels[index - 1]])
img_name = "jpg/image_%05d.jpg" % index
img_ele = self.name2mem[img_name]
image = self.data_tar.extractfile(img_ele).read()
image = np.array(Image.open(io.BytesIO(image)))
if self.transform is not None:
image = self.transform(image)
return image, label.astype('int64')
def __len__(self):
return len(self.indexes)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cv2
from paddle.io import Dataset
__all__ = ["DatasetFolder", "ImageFolder"]
def has_valid_extension(filename, extensions):
"""Checks if a file is a vilid extension.
Args:
filename (str): path to a file
extensions (tuple of str): extensions to consider (lowercase)
Returns:
bool: True if the filename ends with one of given extensions
"""
return filename.lower().endswith(extensions)
def make_dataset(dir, class_to_idx, extensions, is_valid_file=None):
images = []
dir = os.path.expanduser(dir)
if extensions is not None:
def is_valid_file(x):
return has_valid_extension(x, extensions)
for target in sorted(class_to_idx.keys()):
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
for root, _, fnames in sorted(os.walk(d, followlinks=True)):
for fname in sorted(fnames):
path = os.path.join(root, fname)
if is_valid_file(path):
item = (path, class_to_idx[target])
images.append(item)
return images
class DatasetFolder(Dataset):
"""A generic data loader where the samples are arranged in this way:
root/class_a/1.ext
root/class_a/2.ext
root/class_a/3.ext
root/class_b/123.ext
root/class_b/456.ext
root/class_b/789.ext
Args:
root (string): Root directory path.
loader (callable|optional): A function to load a sample given its path.
extensions (tuple[str]|optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable|optional): A function/transform that takes in
a sample and returns a transformed version.
is_valid_file (callable|optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
classes (list): List of the class names.
class_to_idx (dict): Dict with items (class_name, class_index).
samples (list): List of (sample path, class_index) tuples
targets (list): The class_index value for each image in the dataset
Example:
.. code-block:: python
import os
import cv2
import tempfile
import shutil
import numpy as np
from paddle.incubate.hapi.datasets import DatasetFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp()
for i in range(2):
sub_dir = os.path.join(data_dir, 'class_' + str(i))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
for j in range(2):
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
return data_dir
temp_dir = make_fake_dir()
data_folder = DatasetFolder(temp_dir)
for items in data_folder:
break
shutil.rmtree(temp_dir)
"""
def __init__(self,
root,
loader=None,
extensions=None,
transform=None,
is_valid_file=None):
self.root = root
self.transform = transform
if extensions is None:
extensions = IMG_EXTENSIONS
classes, class_to_idx = self._find_classes(self.root)
samples = make_dataset(self.root, class_to_idx, extensions,
is_valid_file)
if len(samples) == 0:
raise (RuntimeError(
"Found 0 files in subfolders of: " + self.root + "\n"
"Supported extensions are: " + ",".join(extensions)))
self.loader = cv2_loader if loader is None else loader
self.extensions = extensions
self.classes = classes
self.class_to_idx = class_to_idx
self.samples = samples
self.targets = [s[1] for s in samples]
def _find_classes(self, dir):
"""
Finds the class folders in a dataset.
Args:
dir (string): Root directory path.
Returns:
tuple: (classes, class_to_idx) where classes are relative to (dir),
and class_to_idx is a dictionary.
"""
if sys.version_info >= (3, 5):
# Faster and available in Python 3.5 and above
classes = [d.name for d in os.scandir(dir) if d.is_dir()]
else:
classes = [
d for d in os.listdir(dir)
if os.path.isdir(os.path.join(dir, d))
]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
return sample, target
def __len__(self):
return len(self.samples)
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
'.tiff', '.webp')
def cv2_loader(path):
return cv2.imread(path)
class ImageFolder(Dataset):
"""A generic data loader where the samples are arranged in this way:
root/1.ext
root/2.ext
root/sub_dir/3.ext
Args:
root (string): Root directory path.
loader (callable, optional): A function to load a sample given its path.
extensions (tuple[string], optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in
a sample and returns a transformed version.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
samples (list): List of sample path
Example:
.. code-block:: python
import os
import cv2
import tempfile
import shutil
import numpy as np
from paddle.incubate.hapi.datasets import ImageFolder
def make_fake_dir():
data_dir = tempfile.mkdtemp()
for i in range(2):
sub_dir = os.path.join(data_dir, 'class_' + str(i))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
for j in range(2):
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
return data_dir
temp_dir = make_fake_dir()
data_folder = ImageFolder(temp_dir)
for items in data_folder:
break
shutil.rmtree(temp_dir)
"""
def __init__(self,
root,
loader=None,
extensions=None,
transform=None,
is_valid_file=None):
self.root = root
if extensions is None:
extensions = IMG_EXTENSIONS
samples = []
path = os.path.expanduser(root)
if extensions is not None:
def is_valid_file(x):
return has_valid_extension(x, extensions)
for root, _, fnames in sorted(os.walk(path, followlinks=True)):
for fname in sorted(fnames):
f = os.path.join(root, fname)
if is_valid_file(f):
samples.append(f)
if len(samples) == 0:
raise (RuntimeError(
"Found 0 files in subfolders of: " + self.root + "\n"
"Supported extensions are: " + ",".join(extensions)))
self.loader = cv2_loader if loader is None else loader
self.extensions = extensions
self.samples = samples
self.transform = transform
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
return [sample]
def __len__(self):
return len(self.samples)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import gzip
import struct
import numpy as np
import paddle.dataset.common
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["MNIST"]
URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
class MNIST(Dataset):
"""
Implement of MNIST dataset
Args:
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
chw_format(bool): If set True, the output shape is [1, 28, 28],
otherwise, output shape is [1, 784]. Default True.
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Returns:
Dataset: MNIST Dataset.
Examples:
.. code-block:: python
from paddle.incubate.hapi.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
image_path=None,
label_path=None,
chw_format=True,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower()
self.chw_format = chw_format
self.image_path = image_path
if self.image_path is None:
assert download, "image_path not set and auto download disabled"
image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL
image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5
self.image_path = _check_exists_and_download(
image_path, image_url, image_md5, 'mnist', download)
self.label_path = label_path
if self.label_path is None:
assert download, "label_path not set and auto download disabled"
label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL
label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5
self.label_path = _check_exists_and_download(
label_path, label_url, label_md5, 'mnist', download)
self.transform = transform
# read dataset into memory
self._parse_dataset()
def _parse_dataset(self, buffer_size=100):
self.images = []
self.labels = []
with gzip.GzipFile(self.image_path, 'rb') as image_file:
img_buf = image_file.read()
with gzip.GzipFile(self.label_path, 'rb') as label_file:
lab_buf = label_file.read()
step_label = 0
offset_img = 0
# read from Big-endian
# get file info from magic byte
# image file : 16B
magic_byte_img = '>IIII'
magic_img, image_num, rows, cols = struct.unpack_from(
magic_byte_img, img_buf, offset_img)
offset_img += struct.calcsize(magic_byte_img)
offset_lab = 0
# label file : 8B
magic_byte_lab = '>II'
magic_lab, label_num = struct.unpack_from(magic_byte_lab,
lab_buf, offset_lab)
offset_lab += struct.calcsize(magic_byte_lab)
while True:
if step_label >= label_num:
break
fmt_label = '>' + str(buffer_size) + 'B'
labels = struct.unpack_from(fmt_label, lab_buf, offset_lab)
offset_lab += struct.calcsize(fmt_label)
step_label += buffer_size
fmt_images = '>' + str(buffer_size * rows * cols) + 'B'
images_temp = struct.unpack_from(fmt_images, img_buf,
offset_img)
images = np.reshape(images_temp, (buffer_size, rows *
cols)).astype('float32')
offset_img += struct.calcsize(fmt_images)
images = images / 255.0
images = images * 2.0
images = images - 1.0
for i in range(buffer_size):
self.images.append(images[i, :])
self.labels.append(
np.array([labels[i]]).astype('int64'))
def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx]
if self.chw_format:
image = np.reshape(image, [1, 28, 28])
if self.transform is not None:
image = self.transform(image)
return image, label
def __len__(self):
return len(self.labels)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import paddle.dataset.common
def _check_exists_and_download(path, url, md5, module_name, download=True):
if path and os.path.exists(path):
return path
if download:
return paddle.dataset.common.download(url, module_name, md5)
else:
raise ValueError('{} not exists and auto download disabled'.format(
path))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import time
import math
import socket
import contextlib
import numpy as np
from paddle import fluid
from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
from paddle.io import BatchSampler
_parallel_context_initialized = False
__all__ = ['DistributedBatchSampler']
class DistributedBatchSampler(BatchSampler):
"""Sampler that restricts data loading to a subset of the dataset.
In such case, each process can pass a DistributedBatchSampler instance
as a DataLoader sampler, and load a subset of the original dataset that
is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Args:
dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
or other python object which implemented
`__len__` for BatchSampler to get sample
number of data source.
batch_size(int): sample indice number in a mini-batch indices.
shuffle(bool): whther to shuffle indices order before genrating
batch indices. Default False.
drop_last(bool): whether drop the last incomplete batch dataset size
is not divisible by the batch size. Default False
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.distributed import DistributedBatchSampler
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
train_dataset = MnistDataset(mode='train')
dist_train_dataloader = DistributedBatchSampler(train_dataset, batch_size=64)
for data in dist_train_dataloader:
# do something
break
"""
def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
self.dataset = dataset
assert isinstance(batch_size, int) and batch_size > 0, \
"batch_size should be a positive integer"
self.batch_size = batch_size
assert isinstance(shuffle, bool), \
"shuffle should be a boolean value"
self.shuffle = shuffle
assert isinstance(drop_last, bool), \
"drop_last should be a boolean number"
self.drop_last = drop_last
self.nranks = ParallelEnv().nranks
self.local_rank = ParallelEnv().local_rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
self.total_size = self.num_samples * self.nranks
def __iter__(self):
num_samples = len(self.dataset)
indices = np.arange(num_samples).tolist()
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
if self.shuffle:
np.random.RandomState(self.epoch).shuffle(indices)
self.epoch += 1
# subsample
def _get_indices_by_batch_size(indices):
subsampled_indices = []
last_batch_size = self.total_size % (self.batch_size * self.nranks)
assert last_batch_size % self.nranks == 0
last_local_batch_size = last_batch_size // self.nranks
for i in range(self.local_rank * self.batch_size,
len(indices) - last_batch_size,
self.batch_size * self.nranks):
subsampled_indices.extend(indices[i:i + self.batch_size])
indices = indices[len(indices) - last_batch_size:]
subsampled_indices.extend(indices[
self.local_rank * last_local_batch_size:(
self.local_rank + 1) * last_local_batch_size])
return subsampled_indices
if self.nranks > 1:
indices = _get_indices_by_batch_size(indices)
assert len(indices) == self.num_samples
_sample_iter = iter(indices)
batch_indices = []
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
yield batch_indices
batch_indices = []
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
def __len__(self):
num_samples = self.num_samples
num_samples += int(not self.drop_last) * (self.batch_size - 1)
return num_samples // self.batch_size
def set_epoch(self, epoch):
self.epoch = epoch
def _all_gather(x, nranks, ring_id=0, use_calc_stream=True):
return collective._c_allgather(
x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream)
def wait_server_ready(endpoints):
assert not isinstance(endpoints, six.string_types)
while True:
all_ok = True
not_ready_endpoints = []
for ep in endpoints:
ip_port = ep.split(":")
with contextlib.closing(
socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex((ip_port[0], int(ip_port[1])))
if result != 0:
all_ok = False
not_ready_endpoints.append(ep)
if not all_ok:
time.sleep(3)
else:
break
def init_communicator(program, rank, nranks, wait_port, current_endpoint,
endpoints):
if nranks < 2:
return
other_endpoints = endpoints[:]
other_endpoints.remove(current_endpoint)
if rank == 0 and wait_port:
wait_server_ready(other_endpoints)
block = program.global_block()
nccl_id_var = block.create_var(
name=fluid.unique_name.generate('nccl_id'),
persistable=True,
type=fluid.core.VarDesc.VarType.RAW)
block.append_op(
type='c_gen_nccl_id',
inputs={},
outputs={'Out': nccl_id_var},
attrs={
'rank': rank,
'endpoint': current_endpoint,
'other_endpoints': other_endpoints
})
block.append_op(
type='c_comm_init',
inputs={'X': nccl_id_var},
outputs={},
attrs={
'nranks': nranks,
'rank': rank,
'ring_id': 0,
})
def prepare_distributed_context(place=None):
if place is None:
place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
else fluid.CUDAPlace(0)
strategy = ParallelStrategy()
strategy.nranks = ParallelEnv().nranks
strategy.local_rank = ParallelEnv().local_rank
strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
strategy.current_endpoint = ParallelEnv().current_endpoint
if strategy.nranks < 2:
return
global _parallel_context_initialized
if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace):
def _init_context():
communicator_prog = fluid.Program()
init_communicator(communicator_prog, strategy.local_rank,
strategy.nranks, True, strategy.current_endpoint,
strategy.trainer_endpoints)
exe = fluid.Executor(place)
exe.run(communicator_prog)
if fluid.in_dygraph_mode():
fluid.disable_dygraph()
_init_context()
fluid.enable_dygraph(place)
else:
_init_context()
else:
assert ("Only support CUDAPlace for now.")
_parallel_context_initialized = True
return strategy
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import os.path as osp
import shutil
import requests
import hashlib
import time
from collections import OrderedDict
from paddle.fluid.dygraph.parallel import ParallelEnv
try:
from tqdm import tqdm
except:
class tqdm(object):
def __init__(self, total=None):
self.total = total
self.n = 0
def update(self, n):
self.n += n
if self.total is None:
sys.stderr.write("\r{0:.1f} bytes".format(self.n))
else:
sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float(
self.total)))
sys.stderr.flush()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stderr.write('\n')
import logging
logger = logging.getLogger(__name__)
__all__ = ['get_weights_path_from_url']
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3
nlp_models = OrderedDict((
('RoBERTa-zh-base',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
),
('RoBERTa-zh-large',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
),
('ERNIE-v2-en-base',
'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
('ERNIE-v2-en-large',
'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
('XLNet-cased-base',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
('XLNet-cased-large',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
('ERNIE-v1-zh-base',
'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
('ERNIE-v1-zh-base-max-len-512',
'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
('BERT-en-uncased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-uncased-base',
'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
('BERT-en-uncased-large',
'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-base',
'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
('BERT-en-cased-large',
'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
('BERT-multilingual-uncased-base',
'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
('BERT-multilingual-cased-base',
'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
('BERT-zh-base',
'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), ))
def is_url(path):
"""
Whether path is URL.
Args:
path (string): URL string or not.
"""
return path.startswith('http://') or path.startswith('https://')
def get_weights_path_from_url(url, md5sum=None):
"""Get weights path from WEIGHT_HOME, if not exists,
download it from url.
Args:
url (str): download url
md5sum (str): md5 sum of download package
Returns:
str: a local path to save downloaded weights.
Examples:
.. code-block:: python
from paddle.incubate.hapi.download import get_weights_path_from_url
resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams'
local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url)
"""
path = get_path_from_url(url, WEIGHTS_HOME, md5sum)
return path
def _map_path(url, root_dir):
# parse path after download under root_dir
fname = osp.split(url)[-1]
fpath = fname
return osp.join(root_dir, fpath)
def get_path_from_url(url, root_dir, md5sum=None, check_exist=True):
""" Download from given url to root_dir.
if file or directory specified by url is exists under
root_dir, return the path directly, otherwise download
from url and decompress it, return the path.
Args:
url (str): download url
root_dir (str): root dir for downloading, it should be
WEIGHTS_HOME or DATASET_HOME
md5sum (str): md5 sum of download package
Returns:
str: a local path to save downloaded models & weights & datasets.
"""
assert is_url(url), "downloading from {} not a url".format(url)
# parse path after download to decompress under root_dir
fullpath = _map_path(url, root_dir)
if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
logger.info("Found {}".format(fullpath))
else:
if ParallelEnv().local_rank == 0:
fullpath = _download(url, root_dir, md5sum)
else:
while not os.path.exists(fullpath):
time.sleep(1)
return fullpath
def _download(url, path, md5sum=None):
"""
Download from url, save to path.
url (str): download url
path (str): download to given path
"""
if not osp.exists(path):
os.makedirs(path)
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
retry_cnt = 0
while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
retry_cnt += 1
else:
raise RuntimeError("Download from {} failed. "
"Retry limit reached".format(url))
logger.info("Downloading {} from {}".format(fname, url))
req = requests.get(url, stream=True)
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
total_size = req.headers.get('content-length')
with open(tmp_fullname, 'wb') as f:
if total_size:
with tqdm(total=(int(total_size) + 1023) // 1024) as pbar:
for chunk in req.iter_content(chunk_size=1024):
f.write(chunk)
pbar.update(1)
else:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
return fullname
def _md5check(fullname, md5sum=None):
if md5sum is None:
return True
logger.info("File {} md5 checking...".format(fullname))
md5 = hashlib.md5()
with open(fullname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
logger.info("File {} md5 check failed, {}(calc) != "
"{}(base)".format(fullname, calc_md5sum, md5sum))
return False
return True
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import logging
from paddle.fluid.dygraph.parallel import ParallelEnv
def setup_logger(output=None, name="hapi", log_level=logging.INFO):
"""
Initialize logger of hapi and set its verbosity level to "INFO".
Args:
output (str): a file name or a directory to save log. If None, will not save log file.
If ends with ".txt" or ".log", assumed to be a file name.
Otherwise, logs will be saved to `output/log.txt`.
name (str): the root module name of this logger. Default: 'hapi'.
log_level (enum): log level. eg.'INFO', 'DEBUG', 'ERROR'. Default: logging.INFO.
Returns:
logging.Logger: a logger
"""
logger = logging.getLogger(name)
logger.propagate = False
logger.setLevel(log_level)
format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# stdout logging: only local rank==0
local_rank = ParallelEnv().local_rank
if local_rank == 0 and len(logger.handlers) == 0:
ch = logging.StreamHandler(stream=sys.stdout)
ch.setLevel(log_level)
ch.setFormatter(logging.Formatter(format_str))
logger.addHandler(ch)
# file logging if output is not None: all workers
if output is not None:
if output.endswith(".txt") or output.endswith(".log"):
filename = output
else:
filename = os.path.join(output, "log.txt")
if local_rank > 0:
filename = filename + ".rank{}".format(local_rank)
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
fh = logging.StreamHandler(filename)
fh.setLevel(log_level)
fh.setFormatter(logging.Formatter(format_str))
logger.addHandler(fh)
return logger
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from paddle import fluid
from paddle.fluid.framework import in_dygraph_mode, Variable
from paddle.fluid.dygraph.base import to_variable
from .utils import to_list
__all__ = ['Loss', 'CrossEntropy', 'SoftmaxWithCrossEntropy']
class Loss(object):
"""
Base class for loss, encapsulates loss logic and APIs
Usage:
custom_loss = CustomLoss()
loss = custom_loss(inputs, labels)
Examples:
.. code-block:: python
from paddle.incubate.hapi.loss import Loss
from paddle import fluid
class SoftmaxWithCrossEntropy(Loss):
def __init__(self, average=True):
super(SoftmaxWithCrossEntropy, self).__init__(average)
def forward(self, outputs, labels):
return [
fluid.layers.softmax_with_cross_entropy(
o, l, return_softmax=False) for o, l in zip(outputs, labels)
]
"""
def __init__(self, average=True):
super(Loss, self).__init__()
self.average = average
def forward(self, outputs, labels):
raise NotImplementedError()
def __call__(self, outputs, labels=None):
labels = to_list(labels)
if in_dygraph_mode() and labels:
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if self.average:
losses = [fluid.layers.reduce_mean(l) for l in losses]
else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses
class CrossEntropy(Loss):
"""
Args:
input (list[Variable]): Input tensor, the data type is float32,
float64, int32, int64.
label (list[Variable]): Label tensor, the data type is float32,
float64, int32, int64.
average (bool, optional): Indicate whether to average the loss, Default: True.
Returns:
list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels.
Examples:
.. code-block:: python
from paddle.incubate.hapi.model import Input
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.loss import CrossEntropy
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet()
loss = CrossEntropy()
model.prepare(loss_function=loss, inputs=inputs, labels=labels)
"""
def __init__(self, average=True):
super(CrossEntropy, self).__init__(average)
def forward(self, outputs, labels):
return [
fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class SoftmaxWithCrossEntropy(Loss):
"""
this op combined softmax and cross entropy.
Args:
input (list[Variable]): Input tensor, the data type is float32,
float64, int32, int64.
label (list[Variable]): Label tensor, the data type is float32,
float64, int32, int64.
average (bool, optional): Indicate whether to average the loss, Default: True.
Returns:
list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels.
Examples:
.. code-block:: python
from paddle.incubate.hapi.model import Input
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.loss import SoftmaxWithCrossEntropy
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet(classifier_activation=None)
loss = SoftmaxWithCrossEntropy()
model.prepare(loss_function=loss, inputs=inputs, labels=labels)
"""
def __init__(self, average=True):
super(SoftmaxWithCrossEntropy, self).__init__(average)
def forward(self, outputs, labels):
return [
fluid.layers.softmax_with_cross_entropy(
o, l, return_softmax=False) for o, l in zip(outputs, labels)
]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import abc
import numpy as np
import paddle.fluid as fluid
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
__all__ = ['Metric', 'Accuracy']
@six.add_metaclass(abc.ABCMeta)
class Metric(object):
"""
Base class for metric, encapsulates metric logic and APIs
Usage:
m = SomeMetric()
for prediction, label in ...:
m.update(prediction, label)
m.accumulate()
Advanced usage for :code:`add_metric_op`
Metric calculating con be accelerate by calucateing metric states
from model outputs and labels by Paddle OPs in :code:`add_metric_op`,
metric states will be fetch as numpy array and call :code:`update`
with states in numpy format.
Metric calculated as follows (operations in Model and Metric are
indicated with curly brackets, while data nodes not):
inputs & labels || ------------------
| ||
{model} ||
| ||
outputs & labels ||
| || tensor data
{Metric.add_metric_op} ||
| ||
metric states(tensor) ||
| ||
{fetch as numpy} || ------------------
| ||
metric states(numpy) || numpy data
| ||
{Metric.update} \/ ------------------
Examples:
For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label`
as inputs, we can calculate the correct prediction matrix between
:code:`pred` and :code:`label` in :code:`add_metric_op`.
For examples, prediction results contains 10 classes, while :code:`pred`
shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size,
and we only need to calculate accurary of top-1 and top-5, we could
calculated the correct prediction matrix of the top-5 scores of the
prediction of each sample like follows, while the correct prediction
matrix shape is [N, 5].
.. code-block:: python
def add_metric_op(pred, label):
# sort prediction and slice the top-5 scores
pred = fluid.layers.argsort(pred, descending=True)[1][:, :5]
# calculate whether the predictions are correct
correct = pred == label
return fluid.layers.cast(correct, dtype='float32')
With the :code:`add_metric_op`, we split some calculations to OPs(which
may run on GPU devices, will be faster), and only fetch 1 tensor with
shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1].
:code:`update` can be define as follows:
.. code-block:: python
def update(self, correct):
accs = []
for i, k in enumerate(self.topk):
num_corrects = correct[:, :k].sum()
num_samples = len(correct)
accs.append(float(num_corrects) / num_samples)
self.total[i] += num_corrects
self.count[i] += num_samples
return accs
"""
def __init__(self):
pass
@abc.abstractmethod
def reset(self):
"""
Reset states and result
"""
raise NotImplementedError("function 'reset' not implemented in {}.".
format(self.__class__.__name__))
@abc.abstractmethod
def update(self, *args):
"""
Update states for metric
Inputs of :code:`update` is the outputs of :code:`Metric.add_metric_op`,
if :code:`add_metric_op` is not defined, the inputs of :code:`update`
will be flatten arguments of **output** of mode and **label** from data:
:code:`update(output1, output2, ..., label1, label2,...)`
see :code:`Metric.add_metric_op`
"""
raise NotImplementedError("function 'update' not implemented in {}.".
format(self.__class__.__name__))
@abc.abstractmethod
def accumulate(self):
"""
Accumulates statistics, computes and returns the metric value
"""
raise NotImplementedError(
"function 'accumulate' not implemented in {}.".format(
self.__class__.__name__))
@abc.abstractmethod
def name(self):
"""
Returns metric name
"""
raise NotImplementedError("function 'name' not implemented in {}.".
format(self.__class__.__name__))
def add_metric_op(self, *args):
"""
This API is advanced usage to accelerate metric calculating, calulations
from outputs of model to the states which should be updated by Metric can
be defined here, where Paddle OPs is also supported. Outputs of this API
will be the inputs of "Metric.update".
If :code:`add_metric_op` is defined, it will be called with **outputs**
of model and **labels** from data as arguments, all outputs and labels
will be concatenated and flatten and each filed as a separate argument
as follows:
:code:`add_metric_op(output1, output2, ..., label1, label2,...)`
If :code:`add_metric_op` is not defined, default behaviour is to pass
input to output, so output format will be:
:code:`return output1, output2, ..., label1, label2,...`
see :code:`Metric.update`
"""
return args
class Accuracy(Metric):
"""
Encapsulates accuracy metric logic
Examples:
.. code-block:: python
from paddle import fluid
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.model import Input
from paddle.incubate.hapi.vision.models import LeNet
fluid.enable_dygraph()
train_dataset = MNIST(mode='train')
model = LeNet()
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optim,
loss_function=CrossEntropy(average=False),
metrics=Accuracy(),
inputs=inputs,
labels=labels)
model.fit(train_dataset, batch_size=64)
"""
def __init__(self, topk=(1, ), name=None, *args, **kwargs):
super(Accuracy, self).__init__(*args, **kwargs)
self.topk = topk
self.maxk = max(topk)
self._init_name(name)
self.reset()
def add_metric_op(self, pred, label, *args):
pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
correct = pred == label
return fluid.layers.cast(correct, dtype='float32')
def update(self, correct, *args):
accs = []
for i, k in enumerate(self.topk):
num_corrects = correct[:, :k].sum()
num_samples = len(correct)
accs.append(float(num_corrects) / num_samples)
self.total[i] += num_corrects
self.count[i] += num_samples
return accs
def reset(self):
self.total = [0.] * len(self.topk)
self.count = [0] * len(self.topk)
def accumulate(self):
res = []
for t, c in zip(self.total, self.count):
res.append(float(t) / c)
return res
def _init_name(self, name):
name = name or 'acc'
if self.maxk != 1:
self._name = ['{}_top{}'.format(name, k) for k in self.topk]
else:
self._name = [name]
def name(self):
return self._name
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import inspect
import os
import pickle
import numpy as np
import six
import warnings
from collections import Iterable
from paddle import fluid
from paddle.fluid.framework import in_dygraph_mode, Variable
from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.layers.utils import flatten
from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
from paddle.fluid.incubate.fleet.base import role_maker
from paddle.io import DataLoader, Dataset
from .loss import Loss
from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from .metrics import Metric
from .callbacks import config_callbacks
from .utils import to_list, to_numpy, flatten_list, restore_flatten_list
__all__ = [
'Model',
'Input',
'set_device',
]
def set_device(device):
"""
Args:
device (str): specify device type, 'cpu' or 'gpu'.
Returns:
fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
"""
assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
"Expected device in ['cpu', 'gpu'], but got {}".format(device)
place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \
else fluid.CPUPlace()
return place
class Input(fluid.dygraph.Layer):
def __init__(self, shape=None, dtype=None, name=None):
super(Input, self).__init__()
self.shape = shape
self.dtype = dtype
self.name = name
def forward(self):
return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
class StaticGraphAdapter(object):
"""
Model traning/inference with a static graph.
"""
def __init__(self, model):
super(StaticGraphAdapter, self).__init__()
self.model = model
# with `_build_once` gone, parameters are now created in `__init__`
# so we need to keep track of the parameters already created
self._startup_prog = fluid.default_startup_program()
self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {}
self._loss_endpoint = None
self._executor = None
self._progs = {}
self._compiled_progs = {}
self._merge_count = {
'eval_total': 0,
'test_total': 0,
'eval_batch': 0,
'test_batch': 0
}
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
@property
def mode(self):
return self.model.mode
@mode.setter
def mode(self, value):
self.model.mode = value
def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
self.mode = 'train'
return self._run(inputs, labels)
def eval_batch(self, inputs, labels=None):
self.mode = 'eval'
return self._run(inputs, labels)
def test_batch(self, inputs):
self.mode = 'test'
return self._run(inputs, None)
def parameters(self, *args, **kwargs):
return super(Model, self.model).parameters(*args, **kwargs)
def save(self, path):
def _save(state, path):
if not state:
return
state = {
k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f:
pickle.dump(state, f)
base = os.path.basename(path)
assert base != "", "path should be of 'dirname/filename' format"
dir_name = os.path.dirname(path)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
param_path = path + ".pdparams"
_save(self.model.state_dict(), param_path)
prog = self._progs.get('train', None)
if prog is None or self.model._optimizer is None:
return
# XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt"
optim = {
p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim:
return
_save(optim, optim_path)
def load(self, param_state_pairs, optim_state):
if self._executor is None:
executor = fluid.Executor(fluid.CPUPlace())._default_executor
else:
executor = self._executor._default_executor
# restore parameter states
fluid.core._create_loaded_parameter(
[param for param, state in param_state_pairs],
global_scope(), executor)
for param, state in param_state_pairs:
self._set_var(param, state)
# restore optimizer states
# FIXME what if a different optimizer is used?
if not self.model._optimizer or not optim_state:
return
self._load_optimizer(optim_state, executor)
def _load_optimizer(self, state, executor):
prog = self._progs.get('train', None)
optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
if not optim:
return
fluid.core._create_loaded_parameter(optim, global_scope(), executor)
converted_state = dict(state)
for var in optim:
if var.name in ["@LR_DECAY_COUNTER@", "global_step"]:
# When using learning rate scheduler, dygraph would name the
# global step var as "global_step" to save, while static-graph
# would has a state var named as "@LR_DECAY_COUNTER@".
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
state_val = (
np.array(converted_state.pop("global_step")) - 1
) if "global_step" in converted_state else converted_state.pop(
"@LR_DECAY_COUNTER@", None)
if state_val is not None:
converted_state[var.name] = state_val
elif var.name.startswith("learning_rate_"):
# When using static learning rate, static-graph would make it
# a persistable var named 'unique_name.generate("learning_rate")',
# However, dygraph wouldn't save it.
if var.name not in state:
continue
else:
# moment and other accumulators
if var.name not in converted_state:
# try to convert from dygraph name
opt_name = self.model._optimizer._name
opt_cls_name = self.model._optimizer.__class__.__name__
opt_unq_name = None
for name in self.model._optimizer._accumulators.keys():
accum_name = name if opt_name is None else name[len(
opt_name) + 1:]
for param_name, state_var in self.model._optimizer._accumulators[
name].items():
if opt_unq_name is None:
# can not infer out the exact unique(opt_name),
# thus try to extract rather than generate
for state_key in sorted(
state.keys(),
key=lambda x: len(x),
reverse=True):
prefix = param_name + "_" + (
opt_cls_name
if opt_name is None else opt_name) + "_"
if state_key.startswith(prefix):
prefix_offset = state_key[len(
prefix):].find("_") + len(prefix)
opt_unq_name = state_key[len(
param_name + "_"):prefix_offset]
# TODO: assert
# assert opt_unq_name is None
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# always end with "_0" since the unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name +
"_" + accum_name + "_0")
converted_state[
state_var.name] = converted_state.pop(
dy_state_name)
assert var.name in converted_state, \
"variable [{}] is not in optimizer state file".format(var.name)
self._set_var(var, converted_state[var.name])
def _set_var(self, var, ndarray):
t = global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
def _run(self, inputs, labels=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
assert compiled_prog, \
"Model is not ready, please call `model.prepare()` first"
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method"
feed = {}
input_names = [v.name for v in self._input_vars[self.mode]]
for idx, n in enumerate(input_names):
# train and test may take different arguments
if inputs[idx] is not None:
feed[n] = inputs[idx]
if labels is not None:
for idx, v in enumerate(self._label_vars[self.mode]):
feed[v.name] = labels[idx]
endpoints = self._endpoints[self.mode]
if self.mode == 'test':
fetch_list = endpoints['output']
else:
metric_list, metric_splits = flatten_list(endpoints['metric'])
fetch_list = endpoints['loss'] + metric_list
num_loss = len(endpoints['loss'])
# if fetch Variable is same as input Variable, do not fetch
# from program, get it from input directly
pruned_fetch_list = []
pruned_fetch_idx_name_map = [""] * len(fetch_list)
for i, fetch_var in enumerate(fetch_list):
if fetch_var.name in feed.keys():
pruned_fetch_idx_name_map[i] = fetch_var.name
else:
pruned_fetch_list.append(fetch_var)
rets = self._executor.run(compiled_prog,
feed=feed,
fetch_list=pruned_fetch_list,
return_numpy=False)
# restore pruned fetch_list Variable from feeds
for i, name in enumerate(pruned_fetch_idx_name_map):
if len(name) > 0:
rets.insert(i, feed[name])
# LoDTensor cannot be fetch as numpy directly
rets = [np.array(v) for v in rets]
if self.mode == 'test':
return rets[:]
losses = rets[:num_loss]
metric_states = restore_flatten_list(rets[num_loss:], metric_splits)
metrics = []
for metric, state in zip(self.model._metrics, metric_states):
# cut off padding size
if self.mode != 'train' and self.model._test_dataloader is not None \
and isinstance(self.model._test_dataloader, DataLoader) \
and self._nranks > 1:
total_size = len(self.model._test_dataloader.dataset)
# TODO: fixme if have better way to get batch size
samples = state[0].shape[0]
current_count = self._merge_count.get(self.mode + '_total', 0)
if current_count + samples >= total_size:
state = [
s[:int(total_size - current_count), ...] for s in state
]
self._merge_count[self.mode + '_total'] = 0
self._merge_count[self.mode + '_batch'] = int(total_size -
current_count)
else:
self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples
metrics.append(metric.update(*state))
return (losses, metrics) if len(metrics) > 0 else losses
def prepare(self):
modes = ['train', 'eval', 'test']
for mode in modes:
self._make_program(mode)
self._compile_and_initialize(self._progs[mode], mode)
def _make_program(self, mode):
prog = self._progs.get(mode, None)
if prog is not None:
return
prog = self._orig_prog.clone()
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
if mode != 'train':
for op in list(prog.global_block().ops):
prog.global_block()._remove_op(0)
if mode == 'train' and self.model._optimizer \
and self.model._optimizer._learning_rate_map:
# HACK workaround learning rate map issue
lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
new_lr_var = prog.global_block().vars[lr_var.name]
self.model._optimizer._learning_rate_map[prog] = new_lr_var
losses = []
metrics = []
with fluid.program_guard(prog, self._startup_prog):
ins = self.model._inputs
lbls = self.model._labels if self.model._labels else []
inputs = [k.forward() for k in to_list(ins)]
labels = [k.forward() for k in to_list(lbls)]
self._label_vars[mode] = labels
outputs = to_list(self.model.forward(*inputs))
if mode != 'test' and self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
if self._nranks > 1 and mode != 'train':
outputs = [_all_gather(o, self._nranks) for o in outputs]
if mode != 'test':
labels = [_all_gather(l, self._nranks) for l in labels]
if mode != 'test':
for metric in self.model._metrics:
metrics.append(
to_list(metric.add_metric_op(*(outputs + labels))))
if mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses)
if self._nranks > 1:
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
dist_strategy = DistributedStrategy()
dist_strategy.mode = "collective"
dist_strategy.collective_mode = "grad_allreduce"
self.model._optimizer = fleet.distributed_optimizer(
self.model._optimizer, strategy=dist_strategy)
self.model._optimizer.minimize(self._loss_endpoint)
if mode != 'train': # clone again to put it in test mode
prog = prog.clone(for_test=True)
self._input_vars[mode] = inputs
self._progs[mode] = prog
self._endpoints[mode] = {
"output": outputs,
"loss": losses,
"metric": metrics
}
def _compile_and_initialize(self, prog, mode):
compiled_prog = self._compiled_progs.get(mode, None)
if compiled_prog is not None:
return compiled_prog
assert self.model._place is not None, \
"device is not set, please call `model.prepare()` first"
place = self.model._place
# XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode
# therefore startup program only needs to run once
if self._executor is None:
self._executor = fluid.Executor(place)
# XXX incremental initialization
uninitialized = []
for var_py in self._startup_prog.list_vars():
var = fluid.global_scope().find_var(var_py.name)
if not var_py.name.startswith('nccl_id') and var and \
var.get_tensor()._is_initialized():
continue
uninitialized.append(var_py)
if uninitialized:
startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog)
if self._nranks < 2:
compiled_prog = fluid.CompiledProgram(prog)
else:
compiled_prog = prog
self._compiled_progs[mode] = compiled_prog
class DynamicGraphAdapter(object):
def __init__(self, model):
super(DynamicGraphAdapter, self).__init__()
self.model = model
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._merge_count = {
'eval_total': 0,
'test_total': 0,
'eval_batch': 0,
'test_batch': 0
}
if self._nranks > 1:
stradegy = fluid.dygraph.parallel.ParallelStrategy()
stradegy.nranks = ParallelEnv().nranks
stradegy.local_rank = ParallelEnv().local_rank
stradegy.trainer_endpoints = ParallelEnv().trainer_endpoints
stradegy.current_endpoint = ParallelEnv().current_endpoint
self.ddp_model = fluid.dygraph.parallel.DataParallel(self.model,
stradegy)
@property
def mode(self):
return self.model.mode
@mode.setter
def mode(self, value):
self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time
def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).train()
self.mode = 'train'
inputs = to_list(inputs)
if labels is not None:
labels = [to_variable(l) for l in to_list(labels)]
if self._nranks > 1:
outputs = self.ddp_model.forward(* [to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses)
final_loss = self.ddp_model.scale_loss(final_loss)
final_loss.backward()
self.ddp_model.apply_collective_grads()
else:
outputs = self.model.forward(* [to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses)
final_loss.backward()
self.model._optimizer.minimize(final_loss)
self.model.clear_gradients()
metrics = []
for metric in self.model._metrics:
metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
labels)))
m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m)
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval_batch(self, inputs, labels=None):
super(Model, self.model).eval()
self.mode = 'eval'
inputs = to_list(inputs)
if labels is not None:
labels = [to_variable(l) for l in to_list(labels)]
outputs = self.model.forward(* [to_variable(x) for x in inputs])
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
else:
losses = []
if self._nranks > 1:
outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
labels = [_all_gather(l, self._nranks) for l in labels]
metrics = []
for metric in self.model._metrics:
# cut off padding value.
if self.model._test_dataloader is not None and self._nranks > 1 \
and isinstance(self.model._test_dataloader, DataLoader):
total_size = len(self.model._test_dataloader.dataset)
samples = outputs[0].shape[0]
current_count = self._merge_count.get(self.mode + '_total', 0)
if current_count + samples >= total_size:
outputs = [
o[:int(total_size - current_count)] for o in outputs
]
labels = [
l[:int(total_size - current_count)] for l in labels
]
self._merge_count[self.mode + '_total'] = 0
self._merge_count[self.mode + '_batch'] = int(total_size -
current_count)
else:
self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples
metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
labels)))
m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m)
# To be consistent with static graph
# return empty loss if loss_function is None
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test_batch(self, inputs):
super(Model, self.model).eval()
self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)]
outputs = self.model.forward(*inputs)
if self._nranks > 1 and isinstance(self.model._place, fluid.CUDAPlace):
outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
return [to_numpy(o) for o in to_list(outputs)]
def parameters(self, *args, **kwargs):
return super(Model, self.model).parameters(*args, **kwargs)
def save(self, path):
params = self.model.state_dict()
fluid.save_dygraph(params, path)
if self.model._optimizer is None:
return
if self.model._optimizer.state_dict():
optim = self.model._optimizer.state_dict()
fluid.save_dygraph(optim, path)
def load(self, param_state_pairs, optim_state):
# restore parameter states
for param, state in param_state_pairs:
param.set_value(state)
# resotre optimizer states
if not self.model._optimizer or not optim_state:
return
# If optimizer performs set_dict when state vars haven't been created,
# which would happen when set_dict before minimize, the state would be
# stored in optimizer._accumulators_holder and loaded lazily.
# To contrive this when loading from static-graph saved states, extend
# state dict to include keys named accoring to dygraph naming rules.
# TODO: if len(self.model._optimizer._accumulators) > 0
converted_state = dict(optim_state)
opt_unq_name = self.model._optimizer._name
if opt_unq_name is None:
opt_unq_name = ''
opt_cls_name = self.model._optimizer.__class__.__name__
opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx
param_names = [param.name for param in self.model.parameters()]
for var_name, state_var in sorted(
optim_state.items(), key=lambda x: len(x[0]), reverse=True):
if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
if var_name == "@LR_DECAY_COUNTER@":
converted_state["global_step"] = np.array(
converted_state.pop("@LR_DECAY_COUNTER@")) + 1
else:
# moment and other accumulators
# extend state dict to include promising dygraph names
for param_name in param_names:
if var_name.startswith(param_name + "_" + opt_name):
# when init optimizer with name
accum_name = var_name[len(param_name + "_" + opt_name +
"_"):]
elif var_name.startswith(param_name +
"_") and opt_name == opt_cls_name:
# when init optimizer without name
accum_name = var_name[len(param_name + "_"):]
else:
continue
# remove suffix idx
accum_name = accum_name[:accum_name.rfind("_")]
# state names always end with "_0" in dygraph because of the
# unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name + "_" +
accum_name + "_0")
converted_state[dy_state_name] = state_var
self.model._optimizer.set_dict(converted_state)
class Model(fluid.dygraph.Layer):
"""
An Model object is network with training and inference features.
Dynamic graph and static graph are supported at the same time,
switched by `fluid.enable_dygraph()`. The usage is as follows.
But note, the switching between dynamic and static should be before
instantiating a Model. The input description, i.e, hapi.Input,
must be required for static graph.
Usage:
.. code-block:: python
import numpy as np
import paddle
import paddle.fluid as fluid
#import paddle.incubate.hapi as hapi
from paddle.incubate.hapi import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.dataset import MNIST
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(784, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = set_device('gpu')
# if use static graph, do not set
fluid.enable_dygraph(device)
model = MyModel()
optim = fluid.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
inputs = [Input([None, 784], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
mnist_data = MNIST(mode='train')
model.prepare(optim,
CrossEntropy(average=True),
hapi.metrics.Accuracy(),
inputs,
labels,
device=device)
model.fit(mnist_data, epochs=2, batch_size=32, verbose=1)
"""
def __init__(self):
super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train'
self._inputs = None
self._labels = None
self._loss_function = None
self._loss_weights = None
self._optimizer = None
self._device = None
self._optimizer = None
self._test_dataloader = None
# init backend
if fluid.in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self)
else:
self._adapter = StaticGraphAdapter(self)
def train_batch(self, inputs, labels=None):
"""
Run one training step on a batch of data.
Args:
inputs (list): A list of numpy.ndarray, each is a batch of
input data.
labels (list): A list of numpy.ndarray, each is a batch of
input label. If has no labels, set None. Default is None.
Returns:
A list of scalar training loss if the model has no metrics,
or a tuple (list of scalar loss, list of metrics) if the model
set metrics.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, Input, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = Linear(784, 1, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = hapi.set_device('gpu')
fluid.enable_dygraph(device)
model = MyModel()
optim = fluid.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
inputs = [Input([None, 784], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(optim,
CrossEntropy(average=True),
inputs=inputs,
labels=labels,
device=device)
data = np.random.random(size=(4,784)).astype(np.float32)
label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
loss = model.train_batch([data], [label])
print(loss)
"""
return self._adapter.train_batch(inputs, labels)
def eval_batch(self, inputs, labels=None):
"""
Run one evaluating step on a batch of data.
Args:
inputs (list): A list of numpy.ndarray, each is a batch of
input data.
labels (list): A list of numpy.ndarray, each is a batch of
input label. If has no labels, set None. Default is None.
Returns:
A list of scalar testing loss if the model has no metrics,
or a tuple (list of scalar loss, list of metrics) if the model
set metrics.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, Input, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = set_device('gpu')
fluid.enable_dygraph(device)
model = MyModel()
optim = fluid.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
inputs = [Input([None, 784], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(optim,
CrossEntropy(average=True),
inputs=inputs,
labels=labels,
device=device)
data = np.random.random(size=(4,784)).astype(np.float32)
label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
loss = model.eval_batch([data], [label])
print(loss)
"""
return self._adapter.eval_batch(inputs, labels)
def test_batch(self, inputs):
"""
Run one testing step on a batch of data.
Args:
inputs (list): A list of numpy.ndarray, each is a batch of
input data.
Returns:
A list of numpy.ndarray of predictions, that is the outputs
of Model forward.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, Input, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = set_device('gpu')
fluid.enable_dygraph(device)
model = MyModel()
inputs = [Input([None, 784], 'float32', name='x')]
model.prepare(inputs=inputs,
device=device)
data = np.random.random(size=(4,784)).astype(np.float32)
out = model.eval_batch([data])
print(out)
"""
return self._adapter.test_batch(inputs)
def save(self, path):
"""
This function saves parameters, optimizer infomation to path.
The parameters contains all the trainable Variable, will save to
a file with suffix ".pdparams".
The optimizer information contains all the variable used by optimizer.
For Adam optimizer, contains beta1, beta2, momentum etc. All the
information will save to a file with suffix ".pdopt". (If the optimizer
have no variable need to save (like SGD), the fill will not generated).
This function will silently overwrite existing file
at the target location.
Args:
path (str): The file prefix to save model. The format is
'dirname/file_prefix' or 'file_prefix'. if empty str. A exception
will be raised.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = set_device('cpu')
fluid.enable_dygraph(device)
model = MyModel()
model.save('checkpoint/test')
"""
if ParallelEnv().local_rank == 0:
self._adapter.save(path)
def load(self, path, skip_mismatch=False, reset_optimizer=False):
"""
Load from files storing the model states and optimizer states. The file
for optimizer states is not necessary if no need to restore the optimizer.
NOTE: parameters are retrieved out from the file storing model states
accoring to their structured names.
For fine-tuning or transfer-learning models where some of the layers have
changed, keep parameters needed to restore have same structured names in
the pre-trained model and fine-tuning model.
Args:
path (str): The prefix of files storing the model states and
optimizer states. The files would be `path.pdparams` and
`path.pdopt` separately, and the latter is not necessary
when no need to restore.
skip_mismatch (bool): Whether to skip the loading of mismatch
parameter or raise an error when mismatch happens (not found
the parameter in file storing model states of or receives a
mismatch shape).
reset_optimizer (bool): If True, ignore the providing file storing
optimizer states and initialize optimizer states from scratch.
Otherwise, restore optimizer states from `path.pdopt` if
a optimizer has been set to the model. Default False.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
device = set_device('cpu')
fluid.enable_dygraph(device)
model = MyModel()
model.load('checkpoint/test')
"""
def _load_state_from_path(path):
if not os.path.exists(path):
return
with open(path, 'rb') as f:
return pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
def _check_match(key, param):
state = param_state.get(key, None)
if state is None:
raise ValueError(
"{} is not found in the providing file.".format(key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
def _strip_postfix(path):
path, ext = os.path.splitext(path)
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
"Unknown postfix {} from weights".format(ext)
return path
path = _strip_postfix(path)
param_state = _load_state_from_path(path + ".pdparams")
assert param_state, "Failed to load parameters, please check path."
matched_param_state = []
for key, param in self.state_dict().items():
try:
match_res = _check_match(key, param)
except ValueError as err:
if skip_mismatch:
warnings.warn(
("Skip loading for {}. ".format(key) + str(err)))
# reset optimizer when mismatch happens
reset_optimizer = True
else:
raise err
matched_param_state.append(match_res)
optim_state = None if reset_optimizer else _load_state_from_path(
path + ".pdopt")
return self._adapter.load(matched_param_state, optim_state)
def parameters(self, *args, **kwargs):
"""
Returns a list of parameters of the model.
Returns:
A list of Parameter in static graph.
A list of ParamBase in dynamic graph.
Examples:
.. code-block:: python
from paddle.incubate.hapi.model import Model, Input, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(20, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
fluid.enable_dygraph()
model = MyModel()
params = model.parameters()
"""
return self._adapter.parameters()
def prepare(self,
optimizer=None,
loss_function=None,
metrics=None,
inputs=None,
labels=None,
device=None):
"""
Configures the model before runing.
Args:
optimizer (Optimizer|None): Optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): Loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
metrics (Metric|list of Metric|None): If metrics is set, all
metrics will be calculated and output in train/eval mode.
inputs (Input|list|dict|None): `inputs`, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): `labels`, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if labels is required in loss_function,
labels must be set. Otherwise, it could be None.
device (str|fluid.CUDAPlace|fluid.CPUPlace|None): Specify device
type, 'CPU', 'GPU', fluid.CUDAPlace or fluid.CPUPlace.
If None, automatically select device according to
installation package version.
Returns:
None
"""
if isinstance(device, fluid.CUDAPlace) or \
(isinstance(device, six.string_types) and device.lower() == 'gpu') \
or (device is None and fluid.is_compiled_with_cuda()):
if isinstance(device, fluid.CUDAPlace):
self._place = device
else:
self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0)
global _parallel_context_initialized
if ParallelEnv().nranks > 1 and not _parallel_context_initialized:
if fluid.in_dygraph_mode():
main_prog_seed = fluid.default_main_program().random_seed
startup_prog_seed = fluid.default_startup_program(
).random_seed
fluid.disable_dygraph()
fluid.enable_dygraph(self._place)
# enable_dygraph would create and switch to a new program,
# thus also copy seed to the new program
fluid.default_main_program().random_seed = main_prog_seed
fluid.default_startup_program(
).random_seed = startup_prog_seed
fluid.dygraph.parallel.prepare_context()
else:
prepare_distributed_context(self._place)
_parallel_context_initialized = True
elif isinstance(device, fluid.CPUPlace):
self._place = device
elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \
or (device is None):
self._place = fluid.CPUPlace()
else:
raise ValueError(
"Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \
but got {}".format(device))
self._optimizer = optimizer
if loss_function:
if not isinstance(loss_function, Loss):
raise TypeError("'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function
if not in_dygraph_mode():
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
metrics = metrics or []
for metric in to_list(metrics):
assert isinstance(metric, Metric), \
"{} is not sub class of Metric".format(
metric.__class__.__name__)
self._metrics = to_list(metrics)
self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [
inputs[n] for n in extract_args(self.forward) if n != 'self'
]
self._labels = to_list(labels)
if not in_dygraph_mode():
self._adapter.prepare()
def fit(
self,
train_data=None,
eval_data=None,
batch_size=1,
epochs=1,
eval_freq=1,
log_freq=10,
save_dir=None,
save_freq=1,
verbose=2,
drop_last=False,
shuffle=True,
num_workers=0,
callbacks=None, ):
"""
Trains the model for a fixed number of epochs. If `eval_data` is set,
evaluation will be done at the end of each epoch.
Args:
train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle paddle.io.Dataset or
paddle.io.Dataloader is recomended. Default: None.
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. Default: None.
batch_size (int): Integer number. The batch size of train_data
and eval_data. When train_data and eval_data are both the
instance of Dataloader, this parameter will be ignored.
Default: 1.
epochs (int): Integer number. The number of epochs to train
the model. Default: 1.
eval_freq (int): The frequency, in number of epochs, an evalutation
is performed. Default: 1.
log_freq (int): The frequency, in number of steps, the training logs
are printed. Default: 10.
save_dir(str|None): The directory to save checkpoint during training.
If None, will not save checkpoint. Default: None.
save_freq (int): The frequency, in number of epochs, to save
checkpoint. Default: 1.
verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent,
1 = progress bar, 2 = one line per epoch. Default: 2.
drop_last (bool): Whether drop the last incomplete batch of
train_data when dataset size is not divisible by the batch size.
When train_data is an instance of Dataloader, this parameter
will be ignored. Default: False.
shuffle (bool): Whther to shuffle train_data. When train_data is
an instance of Dataloader, this parameter will be ignored.
Default: True.
num_workers (int): The number of subprocess to load data, 0 for no
subprocess used and loading data in main process.
When train_data and eval_data are both the instance of
Dataloader, this parameter will be ignored. Default: 0.
callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. Default: None.
Returns:
None
Examples:
1. An example use Dataset and set btch size, shuffle in fit.
How to make a batch is done internally.
.. code-block:: python
from paddle.incubate.hapi.model import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.models import LeNet
dynamic = True
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if dynamic else None
train_dataset = MNIST(mode='train')
val_dataset = MNIST(mode='test')
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet()
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim,
CrossEntropy(),
Accuracy(topk=(1, 2)),
inputs=inputs,
labels=labels,
device=device)
model.fit(train_dataset,
val_dataset,
epochs=2,
batch_size=64,
save_dir='mnist_checkpoint')
2. An example use DataLoader, batch size and shuffle is set in
DataLoader.
.. code-block:: python
from paddle.incubate.hapi.model import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.models import LeNet
dynamic = True
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if dynamic else None
train_dataset = MNIST(mode='train')
train_loader = fluid.io.DataLoader(train_dataset,
places=device, batch_size=64)
val_dataset = MNIST(mode='test')
val_loader = fluid.io.DataLoader(val_dataset,
places=device, batch_size=64)
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet()
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim,
CrossEntropy(),
Accuracy(topk=(1, 2)),
inputs=inputs,
labels=labels,
device=device)
model.fit(train_loader,
val_loader,
epochs=2,
save_dir='mnist_checkpoint')
"""
assert train_data is not None, \
"train_data must be given!"
if isinstance(train_data, Dataset):
train_sampler = DistributedBatchSampler(
train_data,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
train_loader = DataLoader(
train_data,
batch_sampler=train_sampler,
places=self._place,
num_workers=num_workers,
return_list=True)
else:
train_loader = train_data
if eval_data is not None and isinstance(eval_data, Dataset):
eval_sampler = DistributedBatchSampler(
eval_data, batch_size=batch_size)
eval_loader = DataLoader(
eval_data,
batch_sampler=eval_sampler,
places=self._place,
num_workers=num_workers,
return_list=True)
elif eval_data is not None:
eval_loader = eval_data
else:
eval_loader = None
do_eval = eval_loader is not None
self._test_dataloader = eval_loader
steps = self._len_data_loader(train_loader)
cbks = config_callbacks(
callbacks,
model=self,
epochs=epochs,
steps=steps,
log_freq=log_freq,
save_freq=save_freq,
save_dir=save_dir,
verbose=verbose,
metrics=self._metrics_name(), )
cbks.on_begin('train')
for epoch in range(epochs):
cbks.on_epoch_begin(epoch)
logs = self._run_one_epoch(train_loader, cbks, 'train')
cbks.on_epoch_end(epoch, logs)
if do_eval and epoch % eval_freq == 0:
eval_steps = self._len_data_loader(eval_loader)
cbks.on_begin('eval', {
'steps': eval_steps,
'metrics': self._metrics_name()
})
eval_logs = self._run_one_epoch(eval_loader, cbks, 'eval')
cbks.on_end('eval', eval_logs)
cbks.on_end('train', logs)
self._test_dataloader = None
def evaluate(
self,
eval_data,
batch_size=1,
log_freq=10,
verbose=2,
num_workers=0,
callbacks=None, ):
"""
Evaluate the loss and metrics of the model on input dataset.
Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data
and eval_data. When eval_data is the instance of Dataloader,
this argument will be ignored. Default: 1.
log_freq (int): The frequency, in number of steps, the eval logs
are printed. Default: 10.
verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent,
1 = progress bar, 2 = one line per epoch. Default: 2.
num_workers (int): The number of subprocess to load data,
0 for no subprocess used and loading data in main process. When
train_data and eval_data are both the instance of Dataloader,
this parameter will be ignored. Default: 0.
callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. Default: None.
Returns:
dict: Result of metric. The key is the names of Metric,
value is a scalar or numpy.array.
Examples:
.. code-block:: python
# declarative mode
import numpy as np
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.transforms import Compose,Resize
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.model import Input, set_device
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
val_dataset = MNIST(mode='test')
model = LeNet()
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
result = model.evaluate(val_dataset, batch_size=64)
print(result)
# imperative mode
import paddle.fluid.dygraph as dg
place = set_device('cpu')
with dg.guard(place) as g:
model = LeNet()
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
result = model.evaluate(val_dataset, batch_size=64)
print(result)
"""
if eval_data is not None and isinstance(eval_data, Dataset):
eval_sampler = DistributedBatchSampler(
eval_data, batch_size=batch_size)
eval_loader = DataLoader(
eval_data,
batch_sampler=eval_sampler,
places=self._place,
num_workers=num_workers,
return_list=True)
else:
eval_loader = eval_data
self._test_dataloader = eval_loader
cbks = config_callbacks(
callbacks,
model=self,
log_freq=log_freq,
verbose=verbose,
metrics=self._metrics_name(), )
eval_steps = self._len_data_loader(eval_loader)
cbks.on_begin('eval',
{'steps': eval_steps,
'metrics': self._metrics_name()})
logs = self._run_one_epoch(eval_loader, cbks, 'eval')
cbks.on_end('eval', logs)
self._test_dataloader = None
eval_result = {}
for k in self._metrics_name():
eval_result[k] = logs[k]
return eval_result
def predict(self,
test_data,
batch_size=1,
num_workers=0,
stack_outputs=False,
callbacks=None):
"""
Compute the output predictions on testing data.
Args:
test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
argument will be ignored. Default: 1.
num_workers (int): The number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this argument will be ignored. Default: 0.
stack_output (bool): Whether stack output field like a batch, as for an output
filed of a sample is in shape [X, Y], test_data contains N samples, predict
output field will be in shape [N, X, Y] if stack_output is True, and will
be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
is False. stack_outputs as False is used for LoDTensor output situation,
it is recommended set as True if outputs contains no LoDTensor. Default: False.
Returns:
list: output of models.
Examples:
.. code-block:: python
# declarative mode
import numpy as np
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.transforms import Compose,Resize
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.model import Input, set_device
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
test_dataset = MnistDataset(mode='test', return_label=False)
model = LeNet()
model.prepare(inputs=inputs)
result = model.predict(test_dataset, batch_size=64)
print(result)
# imperative mode
import paddle.fluid.dygraph as dg
place = set_device('cpu')
with dg.guard(place) as g:
model = LeNet()
model.prepare(inputs=inputs)
result = model.predict(test_dataset, batch_size=64)
print(result)
"""
if test_data is not None and isinstance(test_data, Dataset):
test_sampler = DistributedBatchSampler(
test_data, batch_size=batch_size)
test_loader = DataLoader(
test_data,
batch_sampler=test_sampler,
places=self._place,
num_workers=num_workers,
return_list=True)
else:
test_loader = test_data
self._test_dataloader = test_loader
cbks = config_callbacks(callbacks, model=self, verbose=1)
test_steps = self._len_data_loader(test_loader)
logs = {'steps': test_steps}
cbks.on_begin('test', logs)
outputs = []
logs, outputs = self._run_one_epoch(test_loader, cbks, 'test')
outputs = list(zip(*outputs))
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may lose its detail info
if stack_outputs:
outputs = [np.vstack(outs) for outs in outputs]
self._test_dataloader = None
cbks.on_end('test', logs)
return outputs
def save_inference_model(self,
save_dir,
model_filename=None,
params_filename=None,
model_only=False):
"""
Save inference model must in static mode.
Args:
save_dir (str): The directory path to save the inference model.
model_filename (str|None): The name of file to save the inference
model itself. If is set None, a default filename
:code:`__model__` will be used.
params_filename (str|None): The name of file to save all related
parameters. If it is set None, parameters will be saved
in separate files .
model_only (bool): If True, It will save inference model only,
and do not save parameters. Default: False.
Returns:
list: The fetch variables' name list
"""
assert not fluid.in_dygraph_mode(
), 'Save inference model must in static mode!'
prog = self._adapter._progs.get('test', None)
assert prog, \
"Model is not ready, please call `model.prepare()` first"
infer_prog = prog.clone(for_test=True)
input_names = [v.name for v in self._adapter._input_vars['test']]
endpoints = self._adapter._endpoints['test']['output']
return fluid.io.save_inference_model(
save_dir,
input_names,
endpoints,
self._adapter._executor,
main_program=infer_prog,
model_filename=model_filename,
params_filename=params_filename,
program_only=model_only)
def _run_one_epoch(self, data_loader, callbacks, mode, logs={}):
outputs = []
for step, data in enumerate(data_loader):
# data might come from different types of data_loader and have
# different format, as following:
# 1. DataLoader in static graph:
# [[input1, input2, ..., label1, lable2, ...]]
# 2. DataLoader in dygraph
# [input1, input2, ..., label1, lable2, ...]
# 3. custumed iterator yield concated inputs and labels:
# [input1, input2, ..., label1, lable2, ...]
# 4. custumed iterator yield seperated inputs and labels:
# ([input1, input2, ...], [label1, lable2, ...])
# To handle all of these, flatten (nested) list to list.
data = flatten(data)
# LoDTensor.shape is callable, where LoDTensor comes from
# DataLoader in static graph
batch_size = data[0].shape()[0] if callable(data[
0].shape) else data[0].shape[0]
callbacks.on_batch_begin(mode, step, logs)
if mode != 'test':
outs = getattr(self, mode + '_batch')(data[:len(self._inputs)],
data[len(self._inputs):])
# losses
loss = outs[0] if self._metrics else outs
metrics = [[l[0] for l in loss]]
# metrics
for metric in self._metrics:
res = metric.accumulate()
metrics.extend(to_list(res))
assert len(self._metrics_name()) == len(metrics)
for k, v in zip(self._metrics_name(), metrics):
logs[k] = v
else:
outs = getattr(self, mode + '_batch')(data)
outputs.append(outs)
logs['step'] = step
if mode == 'train' or self._adapter._merge_count.get(
mode + '_batch', 0) <= 0:
logs['batch_size'] = batch_size * ParallelEnv().nranks
else:
logs['batch_size'] = self._adapter._merge_count[mode + '_batch']
callbacks.on_batch_end(mode, step, logs)
self._reset_metrics()
if mode == 'test':
return logs, outputs
return logs
def _reset_metrics(self):
for metric in self._metrics:
metric.reset()
def _metrics_name(self):
metrics_name = ['loss']
for m in self._metrics:
metrics_name.extend(to_list(m.name()))
return metrics_name
def _len_data_loader(self, data_loader):
try:
steps = len(data_loader)
except Exception:
steps = None
return steps
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import numpy as np
from collections import namedtuple
__all__ = ['ProgressBar']
class ProgressBar(object):
"""progress bar """
def __init__(self,
num=None,
width=30,
verbose=1,
start=True,
file=sys.stdout):
self._num = num
if isinstance(num, int) and num <= 0:
raise TypeError('num should be None or integer (> 0)')
max_width = self._get_max_width()
self._width = width if width <= max_width else max_width
self._total_width = 0
self._verbose = verbose
self.file = file
self._values = {}
self._values_order = []
if start:
self._start = time.time()
self._last_update = 0
self._dynamic_display = (
(hasattr(self.file, 'isatty') and
self.file.isatty()) or 'ipykernel' in sys.modules or
'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ)
def _get_max_width(self):
if sys.version_info > (3, 3):
from shutil import get_terminal_size
else:
try:
from backports.shutil_get_terminal_size import get_terminal_size
except:
def get_terminal_size():
terminal_size = namedtuple("terminal_size", "columns lines")
return terminal_size(80, 24)
terminal_width, _ = get_terminal_size()
max_width = min(int(terminal_width * 0.6), terminal_width - 50)
return max_width
def start(self):
self.file.flush()
self._start = time.time()
def update(self, current_num, values=None):
now = time.time()
if current_num:
time_per_unit = (now - self._start) / current_num
else:
time_per_unit = 0
if time_per_unit >= 1 or time_per_unit == 0:
fps = ' - %.0fs/%s' % (time_per_unit, 'step')
elif time_per_unit >= 1e-3:
fps = ' - %.0fms/%s' % (time_per_unit * 1e3, 'step')
else:
fps = ' - %.0fus/%s' % (time_per_unit * 1e6, 'step')
info = ''
if self._verbose == 1:
prev_total_width = self._total_width
if self._dynamic_display:
sys.stdout.write('\b' * prev_total_width)
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
if self._num is not None:
numdigits = int(np.log10(self._num)) + 1
bar_chars = ('step %' + str(numdigits) + 'd/%d [') % (
current_num, self._num)
prog = float(current_num) / self._num
prog_width = int(self._width * prog)
if prog_width > 0:
bar_chars += ('=' * (prog_width - 1))
if current_num < self._num:
bar_chars += '>'
else:
bar_chars += '='
bar_chars += ('.' * (self._width - prog_width))
bar_chars += ']'
else:
bar_chars = 'step %3d' % current_num
self._total_width = len(bar_chars)
sys.stdout.write(bar_chars)
for k, val in values:
info += ' - %s:' % k
val = val if isinstance(val, list) else [val]
for i, v in enumerate(val):
if isinstance(v, (float, np.float32, np.float64)):
if abs(v) > 1e-3:
info += ' %.4f' % v
else:
info += ' %.4e' % v
else:
info += ' %s' % v
if self._num is not None and current_num < self._num:
eta = time_per_unit * (self._num - current_num)
if eta > 3600:
eta_format = '%d:%02d:%02d' % (eta // 3600,
(eta % 3600) // 60, eta % 60)
elif eta > 60:
eta_format = '%d:%02d' % (eta // 60, eta % 60)
else:
eta_format = '%ds' % eta
info += ' - ETA: %s' % eta_format
info += fps
self._total_width += len(info)
if prev_total_width > self._total_width:
info += (' ' * (prev_total_width - self._total_width))
# newline for another epoch
if self._num is not None and current_num >= self._num:
info += '\n'
if self._num is None:
info += '\n'
sys.stdout.write(info)
sys.stdout.flush()
self._last_update = now
elif self._verbose == 2:
if self._num:
numdigits = int(np.log10(self._num)) + 1
count = ('step %' + str(numdigits) + 'd/%d') % (current_num,
self._num)
else:
count = 'step %3d' % current_num
info = count + info
for k, val in values:
info += ' - %s:' % k
val = val if isinstance(val, list) else [val]
for v in val:
if isinstance(v, (float, np.float32, np.float64)):
if abs(v) > 1e-3:
info += ' %.4f' % v
else:
info += ' %.4e' % v
elif isinstance(v, np.ndarray) and \
v.size == 1 and \
v.dtype in [np.float32, np.float64]:
if abs(v[0]) > 1e-3:
info += ' %.4f' % v[0]
else:
info += ' %.4e' % v[0]
else:
info += ' %s' % v
info += fps
info += '\n'
sys.stdout.write(info)
sys.stdout.flush()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
foreach(TEST_OP ${DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP})
endforeach()
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
function(py_dist_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32)
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS}
COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
# No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST" RUN_SERIAL TRUE)
endif()
endif()
endfunction()
foreach(src ${DIST_TEST_OPS})
message(STATUS ${src})
py_dist_test(${src} SRCS ${src}.py)
endforeach()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import numpy as np
import contextlib
from paddle import fluid
from paddle.incubate.hapi.model import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.datasets import MNIST
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
def compute_accuracy(pred, gt):
pred = np.argmax(pred, -1)
gt = np.array(gt)
correct = pred[:, np.newaxis] == gt
return np.sum(correct) / correct.shape[0]
@unittest.skipIf(not fluid.is_compiled_with_cuda(),
'CPU testing is not supported')
class TestDistTraning(unittest.TestCase):
def test_static_multiple_gpus(self):
device = set_device('gpu')
fluid.enable_dygraph(device)
im_shape = (-1, 1, 28, 28)
batch_size = 128
inputs = [Input(im_shape, 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train')
val_dataset = MnistDataset(mode='test')
test_dataset = MnistDataset(mode='test', return_label=False)
model = LeNet()
optim = fluid.optimizer.Momentum(
learning_rate=0.001, momentum=.9, parameter_list=model.parameters())
loss = CrossEntropy()
model.prepare(optim, loss, Accuracy(), inputs, labels, device=device)
cbk = ProgBarLogger(50)
model.fit(train_dataset,
val_dataset,
epochs=2,
batch_size=batch_size,
callbacks=cbk)
eval_result = model.evaluate(val_dataset, batch_size=batch_size)
output = model.predict(
test_dataset, batch_size=batch_size, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(test_dataset))
acc = compute_accuracy(output[0], val_dataset.labels)
np.testing.assert_allclose(acc, eval_result['acc'])
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import numpy as np
import contextlib
from paddle import fluid
from paddle.incubate.hapi.model import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.datasets import MNIST
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
def compute_accuracy(pred, gt):
pred = np.argmax(pred, -1)
gt = np.array(gt)
correct = pred[:, np.newaxis] == gt
return np.sum(correct) / correct.shape[0]
@unittest.skipIf(not fluid.is_compiled_with_cuda(),
'CPU testing is not supported')
class TestDistTraning(unittest.TestCase):
def test_static_multiple_gpus(self):
device = set_device('gpu')
im_shape = (-1, 1, 28, 28)
batch_size = 128
inputs = [Input(im_shape, 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train')
val_dataset = MnistDataset(mode='test')
test_dataset = MnistDataset(mode='test', return_label=False)
model = LeNet()
optim = fluid.optimizer.Momentum(
learning_rate=0.001, momentum=.9, parameter_list=model.parameters())
loss = CrossEntropy()
model.prepare(optim, loss, Accuracy(), inputs, labels, device=device)
cbk = ProgBarLogger(50)
model.fit(train_dataset,
val_dataset,
epochs=2,
batch_size=batch_size,
callbacks=cbk)
eval_result = model.evaluate(val_dataset, batch_size=batch_size)
output = model.predict(
test_dataset, batch_size=batch_size, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(test_dataset))
acc = compute_accuracy(output[0], val_dataset.labels)
np.testing.assert_allclose(acc, eval_result['acc'])
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import time
import random
import tempfile
import shutil
from paddle.incubate.hapi.model import Input
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.callbacks import config_callbacks
class TestCallbacks(unittest.TestCase):
def setUp(self):
self.save_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.save_dir)
def run_callback(self):
epochs = 2
steps = 50
freq = 2
eval_steps = 20
lenet = LeNet()
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
lenet.prepare(inputs=inputs)
cbks = config_callbacks(
model=lenet,
batch_size=128,
epochs=epochs,
steps=steps,
log_freq=freq,
verbose=self.verbose,
metrics=['loss', 'acc'],
save_dir=self.save_dir)
cbks.on_begin('train')
logs = {'loss': 50.341673, 'acc': 0.00256}
for epoch in range(epochs):
cbks.on_epoch_begin(epoch)
for step in range(steps):
cbks.on_batch_begin('train', step, logs)
logs['loss'] -= random.random() * 0.1
logs['acc'] += random.random() * 0.1
time.sleep(0.005)
cbks.on_batch_end('train', step, logs)
cbks.on_epoch_end(epoch, logs)
eval_logs = {'eval_loss': 20.341673, 'eval_acc': 0.256}
params = {
'steps': eval_steps,
'metrics': ['eval_loss', 'eval_acc'],
}
cbks.on_begin('eval', params)
for step in range(eval_steps):
cbks.on_batch_begin('eval', step, eval_logs)
eval_logs['eval_loss'] -= random.random() * 0.1
eval_logs['eval_acc'] += random.random() * 0.1
eval_logs['batch_size'] = 2
time.sleep(0.005)
cbks.on_batch_end('eval', step, eval_logs)
cbks.on_end('eval', eval_logs)
test_logs = {}
params = {'steps': eval_steps}
cbks.on_begin('test', params)
for step in range(eval_steps):
cbks.on_batch_begin('test', step, test_logs)
test_logs['batch_size'] = 2
time.sleep(0.005)
cbks.on_batch_end('test', step, test_logs)
cbks.on_end('test', test_logs)
cbks.on_end('train')
def test_callback_verbose_0(self):
self.verbose = 0
self.run_callback()
def test_callback_verbose_1(self):
self.verbose = 1
self.run_callback()
def test_callback_verbose_2(self):
self.verbose = 2
self.run_callback()
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import os
import numpy as np
import tempfile
import shutil
import cv2
from paddle.incubate.hapi.datasets import *
from paddle.incubate.hapi.datasets.utils import _check_exists_and_download
class TestFolderDatasets(unittest.TestCase):
def setUp(self):
self.data_dir = tempfile.mkdtemp()
self.empty_dir = tempfile.mkdtemp()
for i in range(2):
sub_dir = os.path.join(self.data_dir, 'class_' + str(i))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
for j in range(2):
fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
def tearDown(self):
shutil.rmtree(self.data_dir)
def test_dataset(self):
dataset_folder = DatasetFolder(self.data_dir)
for _ in dataset_folder:
pass
assert len(dataset_folder) == 4
assert len(dataset_folder.classes) == 2
dataset_folder = DatasetFolder(self.data_dir)
for _ in dataset_folder:
pass
def test_folder(self):
loader = ImageFolder(self.data_dir)
for _ in loader:
pass
loader = ImageFolder(self.data_dir)
for _ in loader:
pass
assert len(loader) == 4
def test_transform(self):
def fake_transform(img):
return img
transfrom = fake_transform
dataset_folder = DatasetFolder(self.data_dir, transform=transfrom)
for _ in dataset_folder:
pass
loader = ImageFolder(self.data_dir, transform=transfrom)
for _ in loader:
pass
def test_errors(self):
with self.assertRaises(RuntimeError):
ImageFolder(self.empty_dir)
with self.assertRaises(RuntimeError):
DatasetFolder(self.empty_dir)
with self.assertRaises(ValueError):
_check_exists_and_download('temp_paddle', None, None, None, False)
class TestMNISTTest(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='test')
self.assertTrue(len(mnist) == 10000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 1)
self.assertTrue(image.shape[1] == 28)
self.assertTrue(image.shape[2] == 28)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestMNISTTrain(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='train', chw_format=False)
self.assertTrue(len(mnist) == 60000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 784)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestFlowersTrain(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='train')
self.assertTrue(len(flowers) == 6149)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 6149)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersValid(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='valid')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersTest(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='test')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import os
import time
import copy
import subprocess
import paddle.fluid as fluid
from paddle.distributed.utils import find_free_ports, watch_local_trainers, get_cluster, TrainerProc
def get_cluster_from_args(selected_gpus):
cluster_node_ips = '127.0.0.1'
node_ip = '127.0.0.1'
node_ips = [x.strip() for x in cluster_node_ips.split(',')]
node_ips.index(node_ip)
free_ports = None
free_ports = find_free_ports(len(selected_gpus))
if free_ports is not None:
free_ports = list(free_ports)
return get_cluster(node_ips, node_ip, free_ports, selected_gpus)
def get_gpus(selected_gpus):
selected_gpus = [x.strip() for x in selected_gpus.split(',')]
return selected_gpus
def start_local_trainers(cluster,
pod,
training_script,
training_script_args,
log_dir=None):
current_env = copy.copy(os.environ.copy())
#paddle broadcast ncclUniqueId use socket, and
#proxy maybe make trainers unreachable, so delete them.
#if we set them to "", grpc will log error message "bad uri"
#so just delete them.
current_env.pop("http_proxy", None)
current_env.pop("https_proxy", None)
procs = []
for t in pod.trainers:
proc_env = {
"FLAGS_selected_gpus": "%s" % ",".join([str(g) for g in t.gpus]),
"PADDLE_TRAINER_ID": "%d" % t.rank,
"PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint,
"PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
"PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints())
}
current_env.update(proc_env)
print("trainer proc env:{}".format(current_env))
if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
cmd = "python -m coverage run --branch -p " + training_script
else:
cmd = "python -u " + training_script
print("start trainer proc:{} env:{}".format(cmd, proc_env))
fn = None
proc = subprocess.Popen(cmd.split(" "), env=current_env)
tp = TrainerProc()
tp.proc = proc
tp.rank = t.rank
tp.log_fn = fn
tp.cmd = cmd
procs.append(tp)
return procs
class TestMultipleGpus(unittest.TestCase):
def run_mnist_2gpu(self, target_file_name):
if fluid.core.get_cuda_device_count() == 0:
return
selected_gpus = get_gpus('0,1')
cluster = None
pod = None
cluster, pod = get_cluster_from_args(selected_gpus)
procs = start_local_trainers(
cluster,
pod,
training_script=target_file_name,
training_script_args=[])
while True:
alive = watch_local_trainers(procs, cluster.trainers_nranks())
if not alive:
print("Local procs complete, POD info:{}".format(pod))
break
time.sleep(3)
def test_hapi_multiple_gpus_static(self):
self.run_mnist_2gpu('dist_hapi_mnist_static.py')
def test_hapi_multiple_gpus_dynamic(self):
self.run_mnist_2gpu('dist_hapi_mnist_dynamic.py')
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from paddle.incubate.hapi.download import get_weights_path_from_url
class TestDownload(unittest.TestCase):
def download(self, url, md5sum):
get_weights_path_from_url(url, md5sum)
def test_download_model(self):
url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
md5sum = '8ff74f291f72533f2a7956a4efff9d88'
self.download(url, md5sum)
def test_exist_download(self):
url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
md5sum = '8ff74f291f72533f2a7956a4efff9d88'
self.download(url, md5sum)
def test_download_without_md5sum(self):
url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
self.download(url, None)
def test_download_errors(self):
with self.assertRaises(RuntimeError):
url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams'
md5sum = '8ff74f291f72533f2a7956a4eftttttt'
self.download(url, md5sum)
with self.assertRaises(RuntimeError):
url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams'
self.download(url, None)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import os
import numpy as np
import shutil
import tempfile
from paddle.incubate.hapi.logger import setup_logger
class TestSetupLogger(unittest.TestCase):
def setUp(self):
self.save_dir = tempfile.mkdtemp()
self.save_file = os.path.join(self.save_dir, 'logger.txt')
def tearDown(self):
shutil.rmtree(self.save_dir)
def logger(self, output=None):
setup_logger(output=output)
def test_logger_no_output(self):
self.logger()
def test_logger_dir(self):
self.logger(self.save_dir)
def test_logger_file(self):
self.logger(self.save_file)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import os
import six
import numpy as np
import shutil
import copy
import paddle
from paddle import fluid
from paddle.incubate.hapi.model import Model, Input
from paddle.incubate.hapi.loss import CrossEntropy, SoftmaxWithCrossEntropy
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
# clip to shiftx, otherwise, when calc loss with
# log(exp(shiftx)), may get log(0)=INF
shiftx = (x - np.max(x)).clip(-64.)
exps = np.exp(shiftx)
return exps / np.sum(exps)
def randomize_probability(batch_size, class_num, dtype='float32'):
prob = np.random.uniform(
0.1, 1.0, size=(batch_size, class_num)).astype(dtype)
prob_sum = prob.sum(axis=1)
for i in six.moves.xrange(len(prob)):
prob[i] /= prob_sum[i]
return prob
def numpy_ce(x, label):
return np.asmatrix(
[[-np.log(x[i][label[i][0]])] for i in range(x.shape[0])],
dtype="float32").mean()
class TestLoss(unittest.TestCase):
def test_cross_entropy(self):
class_num = 100
batch_size = 128
inputs = [randomize_probability(128, class_num) for _ in range(2)]
labels = [
np.random.randint(
0, class_num, (batch_size, 1), dtype="int64") for _ in range(2)
]
gt_out = [numpy_ce(inputs[i], labels[i]) for i in range(2)]
fluid.enable_dygraph()
cross_entropy = CrossEntropy()
out = cross_entropy(
[fluid.dygraph.to_variable(x) for x in inputs],
[fluid.dygraph.to_variable(label) for label in labels])
out = [o.numpy() for o in out]
for o, g in zip(out, gt_out):
np.testing.assert_allclose(o, g, atol=1e-5)
def test_soft_cross_entronpy(self):
class_num = 100
batch_size = 128
inputs = [randomize_probability(128, class_num) for _ in range(2)]
labels = [
np.random.randint(
0, class_num, (batch_size, 1), dtype="int64") for _ in range(2)
]
fluid.enable_dygraph()
softmax_cross_entropy = SoftmaxWithCrossEntropy()
softmax_cross_entropy(
[fluid.dygraph.to_variable(x) for x in inputs],
[fluid.dygraph.to_variable(label) for label in labels])
softmax_cross_entropy = SoftmaxWithCrossEntropy(average=False)
inputs = [randomize_probability(128, class_num)]
labels = [
np.random.randint(
0, class_num, (batch_size, 1), dtype="int64")
]
softmax_cross_entropy([fluid.dygraph.to_variable(x) for x in inputs],
fluid.dygraph.to_variable(labels[0]))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.incubate.hapi.metrics import *
from paddle.incubate.hapi.utils import to_list
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(correct_k / batch_size)
return res
def convert_to_one_hot(y, C):
oh = np.random.random((y.shape[0], C)).astype('float32') * .5
for i in range(y.shape[0]):
oh[i, int(y[i])] = 1.
return oh
class TestAccuracyDynamic(unittest.TestCase):
def setUp(self):
self.topk = (1, )
self.class_num = 5
self.sample_num = 1000
self.name = None
def random_pred_label(self):
label = np.random.randint(0, self.class_num,
(self.sample_num, 1)).astype('int64')
pred = np.random.randint(0, self.class_num,
(self.sample_num, 1)).astype('int32')
pred_one_hot = convert_to_one_hot(pred, self.class_num)
pred_one_hot = pred_one_hot.astype('float32')
return label, pred_one_hot
def test_main(self):
with fluid.dygraph.guard(fluid.CPUPlace()):
acc = Accuracy(topk=self.topk, name=self.name)
for _ in range(10):
label, pred = self.random_pred_label()
label_var = to_variable(label)
pred_var = to_variable(pred)
state = to_list(acc.add_metric_op(pred_var, label_var))
acc.update(* [s.numpy() for s in state])
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
class TestAccuracyStatic(TestAccuracyDynamic):
def test_main(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
pred = fluid.data(
name='pred', shape=[None, self.class_num], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
acc = Accuracy(topk=self.topk, name=self.name)
state = acc.add_metric_op(pred, label)
exe = fluid.Executor(fluid.CPUPlace())
compiled_main_prog = fluid.CompiledProgram(main_prog)
for _ in range(10):
label, pred = self.random_pred_label()
state_ret = exe.run(compiled_main_prog,
feed={'pred': pred,
'label': label},
fetch_list=[s.name for s in to_list(state)],
return_numpy=True)
acc.update(*state_ret)
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyStaticMultiTopk(TestAccuracyStatic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
if __name__ == '__main__':
unittest.main()
# copyright (c) 2020 paddlepaddle authors. all rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import os
import numpy as np
import shutil
import tempfile
from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.base import to_variable
from paddle.incubate.hapi.model import Model, Input, set_device
from paddle.incubate.hapi.loss import CrossEntropy
from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.datasets import MNIST
from paddle.incubate.hapi.vision.models import LeNet
from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context
class LeNetDygraph(fluid.dygraph.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNetDygraph, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2D(
1, 6, 3, stride=1, padding=1),
Pool2D(2, 'max', 2),
Conv2D(
6, 16, 5, stride=1, padding=0),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(
84, 10, act=classifier_activation))
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True, sample_num=None):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
if sample_num:
self.images = self.images[:sample_num]
self.labels = self.labels[:sample_num]
def __getitem__(self, idx):
img, label = self.images[idx], self.labels[idx]
img = np.reshape(img, [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
def compute_acc(pred, label):
pred = np.argmax(pred, -1)
label = np.array(label)
correct = pred[:, np.newaxis] == label
return np.sum(correct) / correct.shape[0]
def dynamic_train(model, dataloader):
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.train()
for inputs, labels in dataloader:
outputs = model(inputs)
loss = fluid.layers.cross_entropy(outputs, labels)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
model.clear_gradients()
def dynamic_evaluate(model, dataloader):
with fluid.dygraph.no_grad():
model.eval()
cnt = 0
for inputs, labels in dataloader:
outputs = model(inputs)
cnt += (np.argmax(outputs.numpy(), -1)[:, np.newaxis] ==
labels.numpy()).astype('int').sum()
return cnt / len(dataloader.dataset)
@unittest.skipIf(not fluid.is_compiled_with_cuda(),
'CPU testing is not supported')
class TestModel(unittest.TestCase):
@classmethod
def setUpClass(cls):
if not fluid.is_compiled_with_cuda():
self.skipTest('module not tested when ONLY_CPU compling')
cls.device = set_device('gpu')
fluid.enable_dygraph(cls.device)
sp_num = 1280
cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num)
cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num)
cls.test_dataset = MnistDataset(
mode='test', return_label=False, sample_num=sp_num)
cls.train_loader = fluid.io.DataLoader(
cls.train_dataset, places=cls.device, batch_size=64)
cls.val_loader = fluid.io.DataLoader(
cls.val_dataset, places=cls.device, batch_size=64)
cls.test_loader = fluid.io.DataLoader(
cls.test_dataset, places=cls.device, batch_size=64)
seed = 333
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
dy_lenet = LeNetDygraph()
cls.init_param = dy_lenet.state_dict()
dynamic_train(dy_lenet, cls.train_loader)
cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)
cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
cls.labels = [Input([None, 1], 'int64', name='label')]
cls.save_dir = tempfile.mkdtemp()
cls.weight_path = os.path.join(cls.save_dir, 'lenet')
fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)
fluid.disable_dygraph()
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.save_dir)
def test_fit_dygraph(self):
self.fit(True)
def test_fit_static(self):
self.fit(False)
def test_evaluate_dygraph(self):
self.evaluate(True)
def test_evaluate_static(self):
self.evaluate(False)
def test_predict_dygraph(self):
self.predict(True)
def test_predict_static(self):
self.predict(False)
def test_prepare_context(self):
prepare_distributed_context()
def fit(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
seed = 333
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
model = LeNet()
optim_new = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim_new,
loss_function=CrossEntropy(average=False),
metrics=Accuracy(),
inputs=self.inputs,
labels=self.labels)
model.fit(self.train_dataset, batch_size=64, shuffle=False)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
train_sampler = DistributedBatchSampler(
self.train_dataset, batch_size=64, shuffle=False)
val_sampler = DistributedBatchSampler(
self.val_dataset, batch_size=64, shuffle=False)
train_loader = fluid.io.DataLoader(
self.train_dataset,
batch_sampler=train_sampler,
places=self.device,
return_list=True)
val_loader = fluid.io.DataLoader(
self.val_dataset,
batch_sampler=val_sampler,
places=self.device,
return_list=True)
model.fit(train_loader, val_loader)
fluid.disable_dygraph() if dynamic else None
def evaluate(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(
metrics=Accuracy(), inputs=self.inputs, labels=self.labels)
model.load(self.weight_path)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
sampler = DistributedBatchSampler(
self.val_dataset, batch_size=64, shuffle=False)
val_loader = fluid.io.DataLoader(
self.val_dataset,
batch_sampler=sampler,
places=self.device,
return_list=True)
model.evaluate(val_loader)
fluid.disable_dygraph() if dynamic else None
def predict(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(inputs=self.inputs)
model.load(self.weight_path)
output = model.predict(
self.test_dataset, batch_size=64, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
acc = compute_acc(output[0], self.val_dataset.labels)
np.testing.assert_allclose(acc, self.acc1)
sampler = DistributedBatchSampler(
self.test_dataset, batch_size=64, shuffle=False)
test_loader = fluid.io.DataLoader(
self.test_dataset,
batch_sampler=sampler,
places=self.device,
return_list=True)
model.evaluate(test_loader)
fluid.disable_dygraph() if dynamic else None
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = Linear(20, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
class TestModelFunction(unittest.TestCase):
def set_seed(self, seed=1024):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
def test_train_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=m.parameters())
m.train()
output = m(to_variable(data))
l = to_variable(label)
loss = fluid.layers.cross_entropy(output, l)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
m.clear_gradients()
fluid.disable_dygraph()
return avg_loss.numpy()
ref = get_expect()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
optim2 = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
inputs = [Input([None, dim], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optim2,
loss_function=CrossEntropy(average=False),
inputs=inputs,
labels=labels,
device=device)
loss, = model.train_batch([data], [label])
np.testing.assert_allclose(loss.flatten(), ref.flatten())
fluid.disable_dygraph() if dynamic else None
def test_test_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
m.eval()
output = m(to_variable(data))
fluid.disable_dygraph()
return output.numpy()
ref = get_expect()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
inputs = [Input([None, dim], 'float32', name='x')]
model.prepare(inputs=inputs, device=device)
out, = model.test_batch([data])
np.testing.assert_allclose(out, ref)
fluid.disable_dygraph() if dynamic else None
def test_save_load(self):
path = tempfile.mkdtemp()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
model.prepare(
inputs=inputs,
optimizer=optim,
loss_function=CrossEntropy(average=False),
labels=labels)
model.save(path + '/test')
model.load(path + '/test')
shutil.rmtree(path)
fluid.disable_dygraph() if dynamic else None
def test_dynamic_save_static_load(self):
path = tempfile.mkdtemp()
# for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) #if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
model.prepare(
inputs=inputs,
optimizer=optim,
loss_function=CrossEntropy(average=False),
labels=labels)
model.save(path + '/test')
fluid.disable_dygraph()
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
model.prepare(
inputs=inputs,
optimizer=optim,
loss_function=CrossEntropy(average=False),
labels=labels)
model.load(path + '/test')
shutil.rmtree(path)
def test_static_save_dynamic_load(self):
path = tempfile.mkdtemp()
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
model.prepare(
inputs=inputs,
optimizer=optim,
loss_function=CrossEntropy(average=False),
labels=labels)
model.save(path + '/test')
device = set_device('cpu')
fluid.enable_dygraph(device) #if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
model.prepare(
inputs=inputs,
optimizer=optim,
loss_function=CrossEntropy(average=False),
labels=labels)
model.load(path + '/test')
shutil.rmtree(path)
fluid.disable_dygraph()
def test_parameters(self):
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
params = model.parameters()
self.assertTrue(params[0].shape[0] == 20)
self.assertTrue(params[0].shape[1] == 10)
fluid.disable_dygraph() if dynamic else None
def test_export_deploy_model(self):
model = LeNet()
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
model.prepare(inputs=inputs)
save_dir = tempfile.mkdtemp()
if not os.path.exists(save_dir):
os.makedirs(save_dir)
tensor_img = np.array(
np.random.random((1, 1, 28, 28)), dtype=np.float32)
ori_results = model.test_batch(tensor_img)
model.save_inference_model(save_dir)
place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=save_dir, executor=exe))
results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
np.testing.assert_allclose(results, ori_results)
shutil.rmtree(save_dir)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import unittest
import random
import time
from paddle.incubate.hapi.progressbar import ProgressBar
class TestProgressBar(unittest.TestCase):
def prog_bar(self, num, epoch, width, verbose=1):
for epoch in range(epoch):
progbar = ProgressBar(num, verbose=verbose)
values = [
['loss', 50.341673],
['acc', 0.00256],
]
for step in range(1, num + 1):
values[0][1] -= random.random() * 0.1
values[1][1] += random.random() * 0.1
if step % 10 == 0:
progbar.update(step, values)
time.sleep(0.002)
progbar.update(step, values)
progbar.update(1, [['loss', int(1)]])
progbar.update(1, [['loss', 'INF']])
progbar.update(1, [['loss', 1e-4]])
progbar.update(1, [['loss', np.array([1.])]])
progbar.update(1, [['loss', np.array([1e-4])]])
progbar.start()
progbar.update(0, values)
progbar._dynamic_display = False
progbar.update(1e4, values)
progbar._num = None
progbar.update(0, values)
progbar._num = 1
progbar.update(1 + 1e-4, values)
def test1(self):
self.prog_bar(50, 1, 30)
def test2(self):
self.prog_bar(50, 2, 30)
def test4(self):
self.prog_bar(50, 2, 30, verbose=2)
def test_errors(self):
with self.assertRaises(TypeError):
ProgressBar(-1)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import numpy as np
from paddle import fluid
from paddle.fluid.framework import Variable
from paddle.fluid.executor import global_scope
def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return list(value)
return [value]
def to_numpy(var):
assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
if isinstance(var, fluid.core.VarBase):
return var.numpy()
t = global_scope().find_var(var.name).get_tensor()
return np.array(t)
def flatten_list(l):
assert isinstance(l, list), "not a list"
outl = []
splits = []
for sl in l:
assert isinstance(sl, list), "sub content not a list"
splits.append(len(sl))
outl += sl
return outl, splits
def restore_flatten_list(l, splits):
outl = []
for split in splits:
assert len(l) >= split, "list length invalid"
sl, l = l[:split], l[split:]
outl.append(sl)
return outl
def extract_args(func):
if hasattr(inspect, 'getfullargspec'):
return inspect.getfullargspec(func)[0]
else:
return inspect.getargspec(func)[0]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import models
from .models import *
__all__ = models.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from . import lenet
from .lenet import *
__all__ = lenet.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential
from ...model import Model
__all__ = ['LeNet']
class LeNet(Model):
"""LeNet model from
`"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
Args:
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 10.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
Examples:
.. code-block:: python
from paddle.incubate.hapi.vision.models import LeNet
model = LeNet()
"""
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNet, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2D(
1, 6, 3, stride=1, padding=1),
Pool2D(2, 'max', 2),
Conv2D(
6, 16, 5, stride=1, padding=0),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(
84, 10, act=classifier_activation))
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
......@@ -178,6 +178,10 @@ packages=['paddle',
'paddle.fluid.incubate.fleet.parameter_server.pslib',
'paddle.fluid.incubate.fleet.collective',
'paddle.fluid.incubate.fleet.utils',
'paddle.incubate.hapi',
'paddle.incubate.hapi.datasets',
'paddle.incubate.hapi.vision',
'paddle.incubate.hapi.vision.models',
'paddle.io',
'paddle.nn',
'paddle.nn.functional',
......
......@@ -102,7 +102,27 @@
"load_persistables_for_inference",
"cache",
"buffered",
"xmap_readers"
"xmap_readers",
"Metric.reset",
"Metric.update",
"Metric.accumulate",
"Metric.name",
"Metric.add_metric_op",
"Callback.set_params",
"Callback.on_train_begin",
"Callback.on_train_end",
"Callback.on_eval_begin",
"Callback.on_eval_end",
"Callback.on_test_begin",
"Callback.on_test_end",
"Callback.on_epoch_begin",
"Callback.on_epoch_end",
"Callback.on_train_batch_begin",
"Callback.on_train_batch_end",
"Callback.on_eval_batch_begin",
"Callback.on_eval_batch_end",
"Callback.on_test_batch_begin",
"Callback.on_test_batch_end"
],
"wlist_no_op_pass":[
"gelu",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册