未验证 提交 43facfd3 编写于 作者: L LielinJiang 提交者: GitHub

[Cherry-pick]Add DistributedBatchSampler and Colerjitter (#25242)

* add DistributedSampler and ColorJitter, test=develop
上级 693083a4
...@@ -96,6 +96,7 @@ if (WITH_TESTING) ...@@ -96,6 +96,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/fluid/tests) add_subdirectory(paddle/fluid/tests)
add_subdirectory(paddle/fluid/contrib/tests) add_subdirectory(paddle/fluid/contrib/tests)
add_subdirectory(paddle/fluid/contrib/slim/tests) add_subdirectory(paddle/fluid/contrib/slim/tests)
add_subdirectory(paddle/incubate/hapi/tests)
endif() endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels DESTINATION opt/paddle/share/wheels
......
...@@ -35,3 +35,6 @@ import paddle.distributed ...@@ -35,3 +35,6 @@ import paddle.distributed
batch = batch.batch batch = batch.batch
import paddle.sysconfig import paddle.sysconfig
import paddle.complex import paddle.complex
from . import incubate
from .incubate import hapi
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import hapi
__all__ = []
__all__ += hapi.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import distributed
from . import vision
__all__ = ['distributed', 'vision']
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler
__all__ = ['DistributedBatchSampler']
class DistributedBatchSampler(BatchSampler):
"""Sampler that restricts data loading to a subset of the dataset.
In such case, each process can pass a DistributedBatchSampler instance
as a DataLoader sampler, and load a subset of the original dataset that
is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Args:
dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
or other python object which implemented
`__len__` for BatchSampler to get sample
number of data source.
batch_size(int): sample indice number in a mini-batch indices.
shuffle(bool): whther to shuffle indices order before genrating
batch indices. Default False.
drop_last(bool): whether drop the last incomplete batch dataset size
is not divisible by the batch size. Default False
Examples:
.. code-block:: python
from paddle.incubate.hapi.distributed import DistributedBatchSampler
class FakeDataset():
def __init__(self):
pass
def __getitem__(self, idx):
return idx,
def __len__(self):
return 10
train_dataset = FakeDataset()
dist_train_dataloader = DistributedBatchSampler(train_dataset, batch_size=4)
for data in dist_train_dataloader:
# do something
break
"""
def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
self.dataset = dataset
assert isinstance(batch_size, int) and batch_size > 0, \
"batch_size should be a positive integer"
self.batch_size = batch_size
assert isinstance(shuffle, bool), \
"shuffle should be a boolean value"
self.shuffle = shuffle
assert isinstance(drop_last, bool), \
"drop_last should be a boolean number"
self.drop_last = drop_last
self.nranks = ParallelEnv().nranks
self.local_rank = ParallelEnv().local_rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
self.total_size = self.num_samples * self.nranks
def __iter__(self):
num_samples = len(self.dataset)
indices = np.arange(num_samples).tolist()
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
if self.shuffle:
np.random.RandomState(self.epoch).shuffle(indices)
self.epoch += 1
# subsample
def _get_indices_by_batch_size(indices):
subsampled_indices = []
last_batch_size = self.total_size % (self.batch_size * self.nranks)
assert last_batch_size % self.nranks == 0
last_local_batch_size = last_batch_size // self.nranks
for i in range(self.local_rank * self.batch_size,
len(indices) - last_batch_size,
self.batch_size * self.nranks):
subsampled_indices.extend(indices[i:i + self.batch_size])
indices = indices[len(indices) - last_batch_size:]
subsampled_indices.extend(indices[
self.local_rank * last_local_batch_size:(
self.local_rank + 1) * last_local_batch_size])
return subsampled_indices
if self.nranks > 1:
indices = _get_indices_by_batch_size(indices)
assert len(indices) == self.num_samples
_sample_iter = iter(indices)
batch_indices = []
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
yield batch_indices
batch_indices = []
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
def __len__(self):
num_samples = self.num_samples
num_samples += int(not self.drop_last) * (self.batch_size - 1)
return num_samples // self.batch_size
def set_epoch(self, epoch):
self.epoch = epoch
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import unittest
from paddle.incubate.hapi.distributed import DistributedBatchSampler
class FakeDataset():
def __init__(self):
pass
def __getitem__(self, index):
return index
def __len__(self):
return 10
class TestDistributedBatchSampler(unittest.TestCase):
def test_sampler(self):
dataset = FakeDataset()
sampler = DistributedBatchSampler(dataset, batch_size=1, shuffle=True)
for batch_idx in sampler:
batch_idx
pass
def test_multiple_gpus_sampler(self):
dataset = FakeDataset()
sampler1 = DistributedBatchSampler(
dataset, batch_size=4, shuffle=True, drop_last=True)
sampler2 = DistributedBatchSampler(
dataset, batch_size=4, shuffle=True, drop_last=True)
sampler1.nranks = 2
sampler1.local_rank = 0
sampler1.num_samples = int(
math.ceil(len(dataset) * 1.0 / sampler1.nranks))
sampler1.total_size = sampler1.num_samples * sampler1.nranks
sampler2.nranks = 2
sampler2.local_rank = 1
sampler2.num_samples = int(
math.ceil(len(dataset) * 1.0 / sampler2.nranks))
sampler2.total_size = sampler2.num_samples * sampler2.nranks
for batch_idx in sampler1:
batch_idx
pass
for batch_idx in sampler2:
batch_idx
pass
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from paddle.incubate.hapi.vision.transforms import transforms
class TestTransforms(unittest.TestCase):
def do_transform(self, trans):
fake_img = (np.random.random((400, 300, 3)) * 255).astype('uint8')
for t in trans:
fake_img = t(fake_img)
def test_color_jitter(self):
trans = [
transforms.BrightnessTransform(0.0), transforms.HueTransform(0.0),
transforms.SaturationTransform(0.0),
transforms.ContrastTransform(0.0),
transforms.ColorJitter(0.2, 0.2, 0.2, 0.2)
]
self.do_transform(trans)
def test_exception(self):
with self.assertRaises(ValueError):
transforms.ContrastTransform(-1.0)
with self.assertRaises(ValueError):
transforms.SaturationTransform(-1.0),
with self.assertRaises(ValueError):
transforms.HueTransform(-1.0)
with self.assertRaises(ValueError):
transforms.BrightnessTransform(-1.0)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import transforms
from .transforms import *
__all__ = transforms.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import transforms
from .transforms import *
__all__ = transforms.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import sys
import cv2
import random
import numpy as np
import collections
if sys.version_info < (3, 3):
Sequence = collections.Sequence
Iterable = collections.Iterable
else:
Sequence = collections.abc.Sequence
Iterable = collections.abc.Iterable
__all__ = [
"BrightnessTransform",
"SaturationTransform",
"ContrastTransform",
"HueTransform",
"ColorJitter",
]
class BrightnessTransform(object):
"""Adjust brightness of the image.
Args:
value (float): How much to adjust the brightness. Can be any
non negative number. 0 gives the original image
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.vision.transforms import BrightnessTransform
transform = BrightnessTransform(0.4)
fake_img = np.random.rand(500, 500, 3).astype('float32')
fake_img = transform(fake_img)
print(fake_img.shape)
"""
def __init__(self, value):
if value < 0:
raise ValueError("brightness value should be non-negative")
self.value = value
def __call__(self, img):
if self.value == 0:
return img
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha
return img.clip(0, 255).astype(dtype)
class ContrastTransform(object):
"""Adjust contrast of the image.
Args:
value (float): How much to adjust the contrast. Can be any
non negative number. 0 gives the original image
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.vision.transforms import ContrastTransform
transform = ContrastTransform(0.4)
fake_img = np.random.rand(500, 500, 3).astype('float32')
fake_img = transform(fake_img)
print(fake_img.shape)
"""
def __init__(self, value):
if value < 0:
raise ValueError("contrast value should be non-negative")
self.value = value
def __call__(self, img):
if self.value == 0:
return img
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha)
return img.clip(0, 255).astype(dtype)
class SaturationTransform(object):
"""Adjust saturation of the image.
Args:
value (float): How much to adjust the saturation. Can be any
non negative number. 0 gives the original image
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.vision.transforms import SaturationTransform
transform = SaturationTransform(0.4)
fake_img = np.random.rand(500, 500, 3).astype('float32')
fake_img = transform(fake_img)
print(fake_img.shape)
"""
def __init__(self, value):
if value < 0:
raise ValueError("saturation value should be non-negative")
self.value = value
def __call__(self, img):
if self.value == 0:
return img
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype)
class HueTransform(object):
"""Adjust hue of the image.
Args:
value (float): How much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.vision.transforms import HueTransform
transform = HueTransform(0.4)
fake_img = np.random.rand(500, 500, 3).astype('float32')
fake_img = transform(fake_img)
print(fake_img.shape)
"""
def __init__(self, value):
if value < 0 or value > 0.5:
raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value
def __call__(self, img):
if self.value == 0:
return img
dtype = img.dtype
img = img.astype(np.uint8)
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL)
h, s, v = cv2.split(hsv_img)
alpha = np.random.uniform(-self.value, self.value)
h = h.astype(np.uint8)
# uint8 addition take cares of rotation across boundaries
with np.errstate(over="ignore"):
h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype)
class ColorJitter(object):
"""Randomly change the brightness, contrast, saturation and hue of an image.
Args:
brightness: How much to jitter brightness.
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
contrast: How much to jitter contrast.
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non negative numbers.
saturation: How much to jitter saturation.
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
hue: How much to jitter hue.
Chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
Examples:
.. code-block:: python
import numpy as np
from paddle.incubate.hapi.vision.transforms import ColorJitter
transform = ColorJitter(0.4)
fake_img = np.random.rand(500, 500, 3).astype('float32')
fake_img = transform(fake_img)
print(fake_img.shape)
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
transforms = []
if brightness != 0:
transforms.append(BrightnessTransform(brightness))
if contrast != 0:
transforms.append(ContrastTransform(contrast))
if saturation != 0:
transforms.append(SaturationTransform(saturation))
if hue != 0:
transforms.append(HueTransform(hue))
random.shuffle(transforms)
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
...@@ -177,6 +177,10 @@ packages=['paddle', ...@@ -177,6 +177,10 @@ packages=['paddle',
'paddle.fluid.incubate.fleet.parameter_server.pslib', 'paddle.fluid.incubate.fleet.parameter_server.pslib',
'paddle.fluid.incubate.fleet.collective', 'paddle.fluid.incubate.fleet.collective',
'paddle.fluid.incubate.fleet.utils', 'paddle.fluid.incubate.fleet.utils',
'paddle.incubate',
'paddle.incubate.hapi',
'paddle.incubate.hapi.vision',
'paddle.incubate.hapi.vision.transforms',
] ]
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册