diff --git a/bmn/BMN.png b/examples/bmn/BMN.png similarity index 100% rename from bmn/BMN.png rename to examples/bmn/BMN.png diff --git a/bmn/README.md b/examples/bmn/README.md similarity index 100% rename from bmn/README.md rename to examples/bmn/README.md diff --git a/bmn/bmn.yaml b/examples/bmn/bmn.yaml similarity index 100% rename from bmn/bmn.yaml rename to examples/bmn/bmn.yaml diff --git a/bmn/bmn_metric.py b/examples/bmn/bmn_metric.py similarity index 99% rename from bmn/bmn_metric.py rename to examples/bmn/bmn_metric.py index a19f87c6b42b8737ddeb52c3a330f59dcc932004..f9bf101f825913572803fbb1168260f83a0d96ac 100644 --- a/bmn/bmn_metric.py +++ b/examples/bmn/bmn_metric.py @@ -20,7 +20,7 @@ import json sys.path.append('../') -from metrics import Metric +from hapi.metrics import Metric from bmn_utils import boundary_choose, bmn_post_processing diff --git a/bmn/bmn_utils.py b/examples/bmn/bmn_utils.py similarity index 69% rename from bmn/bmn_utils.py rename to examples/bmn/bmn_utils.py index 06812e636fdaf6ccc419ca58151402ab50082112..cccf50647a55fabdfe94dd0f1f7e1370e15d0fe2 100644 --- a/bmn/bmn_utils.py +++ b/examples/bmn/bmn_utils.py @@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path): outfile.close() -def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample, - num_sample_perbin): - """ generate sample mask for a boundary-matching pair """ - plen = float(seg_xmax - seg_xmin) - plen_sample = plen / (num_sample * num_sample_perbin - 1.0) - total_samples = [ - seg_xmin + plen_sample * ii - for ii in range(num_sample * num_sample_perbin) - ] - p_mask = [] - for idx in range(num_sample): - bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) * - num_sample_perbin] - bin_vector = np.zeros([tscale]) - for sample in bin_samples: - sample_upper = math.ceil(sample) - sample_decimal, sample_down = math.modf(sample) - if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0: - bin_vector[int(sample_down)] += 1 - sample_decimal - if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0: - bin_vector[int(sample_upper)] += sample_decimal - bin_vector = 1.0 / num_sample_perbin * bin_vector - p_mask.append(bin_vector) - p_mask = np.stack(p_mask, axis=1) - return p_mask - - -def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample, - num_sample_perbin): - """ generate sample mask for each point in Boundary-Matching Map """ - mask_mat = [] - for start_index in range(tscale): - mask_mat_vector = [] - for duration_index in range(dscale): - if start_index + duration_index < tscale: - p_xmin = start_index - p_xmax = start_index + duration_index - center_len = float(p_xmax - p_xmin) + 1 - sample_xmin = p_xmin - center_len * prop_boundary_ratio - sample_xmax = p_xmax + center_len * prop_boundary_ratio - p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax, - tscale, num_sample, - num_sample_perbin) - else: - p_mask = np.zeros([tscale, num_sample]) - mask_mat_vector.append(p_mask) - mask_mat_vector = np.stack(mask_mat_vector, axis=2) - mask_mat.append(mask_mat_vector) - mask_mat = np.stack(mask_mat, axis=3) - mask_mat = mask_mat.astype(np.float32) - - sample_mask = np.reshape(mask_mat, [tscale, -1]) - return sample_mask diff --git a/bmn/config_utils.py b/examples/bmn/config_utils.py similarity index 100% rename from bmn/config_utils.py rename to examples/bmn/config_utils.py diff --git a/bmn/eval.py b/examples/bmn/eval.py similarity index 97% rename from bmn/eval.py rename to examples/bmn/eval.py index d25fc5c79d21fd55743def09445db5821e3e93af..ae6ef6d49e73d1d7c866be4d4346f2b13d262fab 100644 --- a/bmn/eval.py +++ b/examples/bmn/eval.py @@ -18,11 +18,9 @@ import sys import logging import paddle.fluid as fluid -sys.path.append('../') - -from model import set_device, Input +from hapi.model import set_device, Input +from hapi.vision.models import BMN, BmnLoss from bmn_metric import BmnMetric -from bmn_model import BMN, BmnLoss from reader import BmnDataset from config_utils import * diff --git a/bmn/eval_anet_prop.py b/examples/bmn/eval_anet_prop.py similarity index 100% rename from bmn/eval_anet_prop.py rename to examples/bmn/eval_anet_prop.py diff --git a/bmn/infer.list b/examples/bmn/infer.list similarity index 100% rename from bmn/infer.list rename to examples/bmn/infer.list diff --git a/bmn/predict.py b/examples/bmn/predict.py similarity index 97% rename from bmn/predict.py rename to examples/bmn/predict.py index e52927b60562425a1f03cfea12ab6cb21e76b3ef..2fce373b87645e933f5e434128346e9d8898fc2d 100644 --- a/bmn/predict.py +++ b/examples/bmn/predict.py @@ -18,11 +18,9 @@ import os import logging import paddle.fluid as fluid -sys.path.append('../') - -from model import set_device, Input +from hapi.model import set_device, Input +from hapi.vision.models import BMN, BmnLoss from bmn_metric import BmnMetric -from bmn_model import BMN, BmnLoss from reader import BmnDataset from config_utils import * diff --git a/bmn/reader.py b/examples/bmn/reader.py similarity index 100% rename from bmn/reader.py rename to examples/bmn/reader.py diff --git a/bmn/run.sh b/examples/bmn/run.sh similarity index 100% rename from bmn/run.sh rename to examples/bmn/run.sh diff --git a/bmn/train.py b/examples/bmn/train.py similarity index 98% rename from bmn/train.py rename to examples/bmn/train.py index fe46f6a607c6ab8f93be45ffeee11478ef862eb6..bca44177ab7d27eef57660a00eed2218c1422aaa 100644 --- a/bmn/train.py +++ b/examples/bmn/train.py @@ -18,10 +18,8 @@ import logging import sys import os -sys.path.append('../') - -from model import set_device, Input -from bmn_model import BMN, BmnLoss +from hapi.model import set_device, Input +from hapi.vision.models import BMN, BmnLoss from reader import BmnDataset from config_utils import * diff --git a/image_classification/README.MD b/examples/image_classification/README.MD similarity index 100% rename from image_classification/README.MD rename to examples/image_classification/README.MD diff --git a/image_classification/imagenet_dataset.py b/examples/image_classification/imagenet_dataset.py similarity index 90% rename from image_classification/imagenet_dataset.py rename to examples/image_classification/imagenet_dataset.py index 158093b3aa9380547490ac5da2386695dd71dd33..6572df01440a36c21330cc905da045e03ff79700 100644 --- a/image_classification/imagenet_dataset.py +++ b/examples/image_classification/imagenet_dataset.py @@ -18,8 +18,8 @@ import math import random import numpy as np -from datasets.folder import DatasetFolder -from transform import transforms +from hapi.datasets import DatasetFolder +from hapi.vision.transforms import transforms from paddle import fluid @@ -45,7 +45,8 @@ class ImageNetDataset(DatasetFolder): def __getitem__(self, idx): img_path, label = self.samples[idx] img = cv2.imread(img_path).astype(np.float32) - return self.transform(img), [label] + label = np.array([label]) + return self.transform(img, label) def __len__(self): return len(self.samples) diff --git a/image_classification/main.py b/examples/image_classification/main.py similarity index 96% rename from image_classification/main.py rename to examples/image_classification/main.py index bd986f61b8389f15500fa109e2086f2d054892b1..546991528631909d5f75caec4df96c63053e7fdb 100644 --- a/image_classification/main.py +++ b/examples/image_classification/main.py @@ -24,16 +24,18 @@ sys.path.append('../') import time import math import numpy as np -import models -import paddle.fluid as fluid -from model import CrossEntropy, Input, set_device -from imagenet_dataset import ImageNetDataset -from distributed import DistributedBatchSampler +import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv -from metrics import Accuracy from paddle.io import BatchSampler, DataLoader +from hapi.model import CrossEntropy, Input, set_device +from hapi.distributed import DistributedBatchSampler +from hapi.metrics import Accuracy +import hapi.vision.models as models + +from imagenet_dataset import ImageNetDataset + def make_optimizer(step_per_epoch, parameter_list=None): base_lr = FLAGS.lr diff --git a/tsm/README.md b/examples/tsm/README.md similarity index 100% rename from tsm/README.md rename to examples/tsm/README.md diff --git a/tsm/check.py b/examples/tsm/check.py similarity index 100% rename from tsm/check.py rename to examples/tsm/check.py diff --git a/tsm/dataset/README.md b/examples/tsm/dataset/README.md similarity index 100% rename from tsm/dataset/README.md rename to examples/tsm/dataset/README.md diff --git a/tsm/dataset/kinetics/generate_label.py b/examples/tsm/dataset/kinetics/generate_label.py similarity index 100% rename from tsm/dataset/kinetics/generate_label.py rename to examples/tsm/dataset/kinetics/generate_label.py diff --git a/tsm/dataset/kinetics/video2pkl.py b/examples/tsm/dataset/kinetics/video2pkl.py similarity index 100% rename from tsm/dataset/kinetics/video2pkl.py rename to examples/tsm/dataset/kinetics/video2pkl.py diff --git a/tsm/images/temporal_shift.png b/examples/tsm/images/temporal_shift.png similarity index 100% rename from tsm/images/temporal_shift.png rename to examples/tsm/images/temporal_shift.png diff --git a/tsm/infer.py b/examples/tsm/infer.py similarity index 97% rename from tsm/infer.py rename to examples/tsm/infer.py index 78dbe2cc6ab92dc2a85fee8f186b1b1ae8d74fdd..3de1c8438fe3f35be3a527950e0fa65705defe77 100644 --- a/tsm/infer.py +++ b/examples/tsm/infer.py @@ -19,8 +19,8 @@ import os import argparse import numpy as np -from model import Input, set_device -from models import tsm_resnet50 +from hapi.model import Input, set_device +from hapi.vision.models import tsm_resnet50 from check import check_gpu, check_version from kinetics_dataset import KineticsDataset diff --git a/tsm/kinetics_dataset.py b/examples/tsm/kinetics_dataset.py similarity index 100% rename from tsm/kinetics_dataset.py rename to examples/tsm/kinetics_dataset.py diff --git a/tsm/main.py b/examples/tsm/main.py similarity index 97% rename from tsm/main.py rename to examples/tsm/main.py index 07868dbdc43565341b19ef6fe69c693f812c6258..24b37938e82d999bfd046913d0f711bf74650cc3 100644 --- a/tsm/main.py +++ b/examples/tsm/main.py @@ -22,9 +22,9 @@ import numpy as np from paddle import fluid from paddle.fluid.dygraph.parallel import ParallelEnv -from model import Model, CrossEntropy, Input, set_device -from metrics import Accuracy -from models import tsm_resnet50 +from hapi.model import Model, CrossEntropy, Input, set_device +from hapi.metrics import Accuracy +from hapi.vision.models import tsm_resnet50 from check import check_gpu, check_version from kinetics_dataset import KineticsDataset diff --git a/tsm/transforms.py b/examples/tsm/transforms.py similarity index 100% rename from tsm/transforms.py rename to examples/tsm/transforms.py diff --git a/yolov3/README.md b/examples/yolov3/README.md similarity index 100% rename from yolov3/README.md rename to examples/yolov3/README.md diff --git a/yolov3/coco_metric.py b/examples/yolov3/coco_metric.py similarity index 100% rename from yolov3/coco_metric.py rename to examples/yolov3/coco_metric.py diff --git a/yolov3/dataset/download_voc.py b/examples/yolov3/dataset/download_voc.py similarity index 100% rename from yolov3/dataset/download_voc.py rename to examples/yolov3/dataset/download_voc.py diff --git a/yolov3/image/YOLOv3.jpg b/examples/yolov3/image/YOLOv3.jpg similarity index 100% rename from yolov3/image/YOLOv3.jpg rename to examples/yolov3/image/YOLOv3.jpg diff --git a/yolov3/image/YOLOv3_structure.jpg b/examples/yolov3/image/YOLOv3_structure.jpg similarity index 100% rename from yolov3/image/YOLOv3_structure.jpg rename to examples/yolov3/image/YOLOv3_structure.jpg diff --git a/yolov3/image/dog.jpg b/examples/yolov3/image/dog.jpg similarity index 100% rename from yolov3/image/dog.jpg rename to examples/yolov3/image/dog.jpg diff --git a/yolov3/infer.py b/examples/yolov3/infer.py similarity index 90% rename from yolov3/infer.py rename to examples/yolov3/infer.py index 21392f0acf1644e043f22f982970ec2379080fc2..d166874383a9b4dc733bd736af1f23da3eaefeda 100644 --- a/yolov3/infer.py +++ b/examples/yolov3/infer.py @@ -24,11 +24,11 @@ from paddle import fluid from paddle.fluid.optimizer import Momentum from paddle.io import DataLoader -from model import Model, Input, set_device -from models import yolov3_darknet53, YoloLoss +from hapi.model import Model, Input, set_device +from hapi.vision.models import yolov3_darknet53, YoloLoss +from hapi.vision.transforms import * from coco import COCODataset -from transforms import * from visualizer import draw_bbox import logging @@ -65,7 +65,8 @@ def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None - inputs = [Input([None, 3], 'int32', name='img_info'), + inputs = [Input([None, 1], 'int64', name='img_id'), + Input([None, 2], 'int32', name='img_shape'), Input([None, 3, None, None], 'float32', name='image')] cat2name = load_labels(FLAGS.label_list, with_background=False) @@ -87,9 +88,10 @@ def main(): img -= np.array(IMAGE_MEAN) img /= np.array(IMAGE_STD) img = img.transpose((2, 0, 1))[np.newaxis, :] - img_info = np.array([0, h, w]).astype('int32')[np.newaxis, :] + img_id = np.array([0]).astype('int64')[np.newaxis, :] + img_shape = np.array([h, w]).astype('int32')[np.newaxis, :] - _, bboxes = model.test([img_info, img]) + _, bboxes = model.test([img_id, img_shape, img]) vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold) save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image) diff --git a/yolov3/main.py b/examples/yolov3/main.py similarity index 97% rename from yolov3/main.py rename to examples/yolov3/main.py index 6709f8a0ec70c2b8e37730313e4f386a58192344..ebe85543712d82267eaded26da4c1db8800b735f 100644 --- a/yolov3/main.py +++ b/examples/yolov3/main.py @@ -25,13 +25,13 @@ from paddle import fluid from paddle.fluid.optimizer import Momentum from paddle.io import DataLoader -from model import Model, Input, set_device -from distributed import DistributedBatchSampler -from models import yolov3_darknet53, YoloLoss +from hapi.model import Model, Input, set_device +from hapi.distributed import DistributedBatchSampler +from hapi.datasets import COCODataset +from hapi.vision.transforms import * +from hapi.vision.models import yolov3_darknet53, YoloLoss from coco_metric import COCOMetric -from vision.datasets import COCODataset -from vision.transforms import * NUM_MAX_BOXES = 50 diff --git a/yolov3/visualizer.py b/examples/yolov3/visualizer.py similarity index 100% rename from yolov3/visualizer.py rename to examples/yolov3/visualizer.py diff --git a/hapi/callbacks.py b/hapi/callbacks.py index 4e76bf7209d82234f5dc6954002911a07ca55d0f..66690cf288efe8ba0d8dcc9eec64031674c8a18b 100644 --- a/hapi/callbacks.py +++ b/hapi/callbacks.py @@ -15,7 +15,7 @@ import six import copy -from hapi.progressbar import ProgressBar +from progressbar import ProgressBar from paddle.fluid.dygraph.parallel import ParallelEnv diff --git a/vision/datasets/__init__.py b/hapi/datasets/__init__.py similarity index 100% rename from vision/datasets/__init__.py rename to hapi/datasets/__init__.py diff --git a/vision/datasets/coco.py b/hapi/datasets/coco.py similarity index 100% rename from vision/datasets/coco.py rename to hapi/datasets/coco.py diff --git a/vision/datasets/flowers.py b/hapi/datasets/flowers.py similarity index 95% rename from vision/datasets/flowers.py rename to hapi/datasets/flowers.py index a261c1cb9d9dd878e97eb74d24ceac4e5519b646..1f4f707888d460260d598826ba15ca3c69455f7b 100644 --- a/vision/datasets/flowers.py +++ b/hapi/datasets/flowers.py @@ -75,7 +75,6 @@ class Flowers(Dataset): setid_file=None, mode='train', transform=None, - target_transform=None, download=True): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) @@ -100,7 +99,6 @@ class Flowers(Dataset): setid_file, SETID_URL, SETID_MD5, 'flowers', download) self.transform = transform - self.target_transform = target_transform # read dataset into memory self._load_anno() @@ -123,9 +121,7 @@ class Flowers(Dataset): image = np.array(Image.open(io.BytesIO(image))) if self.transform is not None: - image = self.transform(image) - if self.target_transform is not None: - label = self.target_transform(label) + image, label = self.transform(image, label) return image, label diff --git a/vision/datasets/folder.py b/hapi/datasets/folder.py similarity index 93% rename from vision/datasets/folder.py rename to hapi/datasets/folder.py index 91bb5269f52b2bdd8c015bcee895dfc7782a6aad..5c728a63f8d8b0bf313d94a3d5e5c605686d6451 100644 --- a/vision/datasets/folder.py +++ b/hapi/datasets/folder.py @@ -78,8 +78,6 @@ class DatasetFolder(Dataset): both extensions and is_valid_file should not be passed. transform (callable|optional): A function/transform that takes in a sample and returns a transformed version. - target_transform (callable|optional): A function/transform that takes - in the target and transforms it. is_valid_file (callable|optional): A function that takes path of a file and check if the file is a valid file (used to check of corrupt files) both extensions and is_valid_file should not be passed. @@ -96,11 +94,9 @@ class DatasetFolder(Dataset): loader=None, extensions=None, transform=None, - target_transform=None, is_valid_file=None): self.root = root self.transform = transform - self.target_transform = target_transform if extensions is None: extensions = IMG_EXTENSIONS classes, class_to_idx = self._find_classes(self.root) @@ -154,9 +150,7 @@ class DatasetFolder(Dataset): path, target = self.samples[index] sample = self.loader(path) if self.transform is not None: - sample = self.transform(sample) - if self.target_transform is not None: - target = self.target_transform(target) + sample, target = self.transform(sample, target) return sample, target diff --git a/vision/datasets/mnist.py b/hapi/datasets/mnist.py similarity index 96% rename from vision/datasets/mnist.py rename to hapi/datasets/mnist.py index 0e55b06a724bcd525e330fed3ca8b9f7240bfd00..18c62901edb95fd573334a4f3fe2201be7447711 100644 --- a/vision/datasets/mnist.py +++ b/hapi/datasets/mnist.py @@ -72,7 +72,6 @@ class MNIST(Dataset): label_path=None, mode='train', transform=None, - target_transform=None, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) @@ -95,7 +94,6 @@ class MNIST(Dataset): label_path, label_url, label_md5, 'mnist', download) self.transform = transform - self.target_transform = target_transform # read dataset into memory self._parse_dataset() @@ -151,9 +149,7 @@ class MNIST(Dataset): def __getitem__(self, idx): image, label = self.images[idx], self.labels[idx] if self.transform is not None: - image = self.transform(image) - if self.target_transform is not None: - label = self.target_transform(label) + image, label = self.transform(image, label) return image, label def __len__(self): diff --git a/vision/datasets/utils.py b/hapi/datasets/utils.py similarity index 100% rename from vision/datasets/utils.py rename to hapi/datasets/utils.py diff --git a/hapi/distributed.py b/hapi/distributed.py index 87818545671c45cf4faba234406e87762e897784..39bf9a35e79792a1f0c9dd23d296730fdc31daf5 100644 --- a/hapi/distributed.py +++ b/hapi/distributed.py @@ -23,7 +23,7 @@ import numpy as np from paddle import fluid from paddle.fluid.layers import collective from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy -from paddle.fluid.io import BatchSampler +from paddle.io import BatchSampler _parallel_context_initialized = False @@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler): Dataset is assumed to be of constant size. Args: - data_source: this could be a `fluid.io.Dataset` implement + data_source: this could be a `paddle.io.Dataset` implement or other python object which implemented `__len__` for BatchSampler to get sample number of data source. diff --git a/hapi/download.py b/hapi/download.py new file mode 100644 index 0000000000000000000000000000000000000000..10d3fba390647c494448b83295901a8973d2aba8 --- /dev/null +++ b/hapi/download.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import os.path as osp +import shutil +import requests +import tqdm +import hashlib +import time + +from paddle.fluid.dygraph.parallel import ParallelEnv + +import logging +logger = logging.getLogger(__name__) + +__all__ = ['get_weights_path'] + +WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") + +DOWNLOAD_RETRY_LIMIT = 3 + + +def get_weights_path(url, md5sum=None): + """Get weights path from WEIGHT_HOME, if not exists, + download it from url. + """ + path, _ = get_path(url, WEIGHTS_HOME, md5sum) + return path + + +def map_path(url, root_dir): + # parse path after download under root_dir + fname = osp.split(url)[-1] + fpath = fname + return osp.join(root_dir, fpath) + + +def get_path(url, root_dir, md5sum=None, check_exist=True): + """ Download from given url to root_dir. + if file or directory specified by url is exists under + root_dir, return the path directly, otherwise download + from url and decompress it, return the path. + + url (str): download url + root_dir (str): root dir for downloading, it should be + WEIGHTS_HOME or DATASET_HOME + md5sum (str): md5 sum of download package + """ + # parse path after download to decompress under root_dir + fullpath = map_path(url, root_dir) + + exist_flag = False + if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum): + exist_flag = True + if ParallelEnv().local_rank == 0: + logger.info("Found {}".format(fullpath)) + else: + if ParallelEnv().local_rank == 0: + fullpath = _download(url, root_dir, md5sum) + else: + while not os.path.exists(fullpath): + time.sleep(1) + return fullpath, exist_flag + + +def _download(url, path, md5sum=None): + """ + Download from url, save to path. + + url (str): download url + path (str): download to given path + """ + if not osp.exists(path): + os.makedirs(path) + + fname = osp.split(url)[-1] + fullname = osp.join(path, fname) + retry_cnt = 0 + + while not (osp.exists(fullname) and _md5check(fullname, md5sum)): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + raise RuntimeError("Download from {} failed. " + "Retry limit reached".format(url)) + if ParallelEnv().local_rank == 0: + logger.info("Downloading {} from {}".format(fname, url)) + + req = requests.get(url, stream=True) + if req.status_code != 200: + raise RuntimeError("Downloading from {} failed with code " + "{}!".format(url, req.status_code)) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + for chunk in tqdm.tqdm( + req.iter_content(chunk_size=1024), + total=(int(total_size) + 1023) // 1024, + unit='KB'): + f.write(chunk) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + + return fullname + + +def _md5check(fullname, md5sum=None): + if md5sum is None: + return True + if ParallelEnv().local_rank == 0: + logger.info("File {} md5 checking...".format(fullname)) + md5 = hashlib.md5() + with open(fullname, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b""): + md5.update(chunk) + calc_md5sum = md5.hexdigest() + + if calc_md5sum != md5sum: + if ParallelEnv().local_rank == 0: + logger.info("File {} md5 check failed, {}(calc) != " + "{}(base)".format(fullname, calc_md5sum, md5sum)) + return False + return True diff --git a/hapi/model.py b/hapi/model.py index d9451084bdc5e81447eb9b2eb5fc9dbf2cadcabe..3255e614fd80529cdd7ac17ca31604c6815a11c4 100644 --- a/hapi/model.py +++ b/hapi/model.py @@ -32,7 +32,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.layers.utils import flatten from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy from paddle.fluid.incubate.fleet.base import role_maker -from paddle.fluid.io import DataLoader, Dataset +from paddle.io import DataLoader, Dataset from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized from hapi.metrics import Metric @@ -45,6 +45,14 @@ __all__ = [ def set_device(device): + """ + Args: + device (str): specify device type, 'cpu' or 'gpu'. + + Returns: + fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place. + """ + assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \ "Expected device in ['cpu', 'gpu'], but got {}".format(device) @@ -117,9 +125,9 @@ class Loss(object): def forward(self, outputs, labels): raise NotImplementedError() - def __call__(self, outputs, labels): + def __call__(self, outputs, labels=None): labels = to_list(labels) - if in_dygraph_mode(): + if in_dygraph_mode() and labels: labels = [to_variable(l) for l in labels] losses = to_list(self.forward(to_list(outputs), labels)) if self.average: @@ -366,10 +374,27 @@ class StaticGraphAdapter(object): metric_list, metric_splits = flatten_list(endpoints['metric']) fetch_list = endpoints['loss'] + metric_list num_loss = len(endpoints['loss']) + + # if fetch Variable is same as input Variable, do not fetch + # from program, get it from input directly + pruned_fetch_list = [] + pruned_fetch_idx_name_map = [""] * len(fetch_list) + for i, fetch_var in enumerate(fetch_list): + if fetch_var.name in feed.keys(): + pruned_fetch_idx_name_map[i] = fetch_var.name + else: + pruned_fetch_list.append(fetch_var) + rets = self._executor.run(compiled_prog, feed=feed, - fetch_list=fetch_list, + fetch_list=pruned_fetch_list, return_numpy=False) + + # restore pruned fetch_list Variable from feeds + for i, name in enumerate(pruned_fetch_idx_name_map): + if len(name) > 0: + rets.insert(i, feed[name]) + # LoDTensor cannot be fetch as numpy directly rets = [np.array(v) for v in rets] if self.mode == 'test': @@ -867,8 +892,6 @@ class Model(fluid.dygraph.Layer): if not isinstance(inputs, (list, dict, Input)): raise TypeError( "'inputs' must be list or dict in static graph mode") - if loss_function and not isinstance(labels, (list, Input)): - raise TypeError("'labels' must be list in static graph mode") metrics = metrics or [] for metric in to_list(metrics): @@ -904,11 +927,11 @@ class Model(fluid.dygraph.Layer): FIXME: add more comments and usage Args: train_data (Dataset|DataLoader): An iterable data loader is used for - train. An instance of paddle.fluid.io.Dataset or - paddle.fluid.io.Dataloader is recomended. + train. An instance of paddle paddle.io.Dataset or + paddle.io.Dataloader is recomended. eval_data (Dataset|DataLoader): An iterable data loader is used for evaluation at the end of epoch. If None, will not do evaluation. - An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader + An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this @@ -1032,8 +1055,8 @@ class Model(fluid.dygraph.Layer): FIXME: add more comments and usage Args: eval_data (Dataset|DataLoader): An iterable data loader is used for - evaluation. An instance of paddle.fluid.io.Dataset or - paddle.fluid.io.Dataloader is recomended. + evaluation. An instance of paddle.io.Dataset or + paddle.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. @@ -1098,12 +1121,16 @@ class Model(fluid.dygraph.Layer): return eval_result - def predict(self, test_data, batch_size=1, num_workers=0): + def predict(self, + test_data, + batch_size=1, + num_workers=0, + stack_outputs=True): """ FIXME: add more comments and usage Args: test_data (Dataset|DataLoader): An iterable data loader is used for - predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader + predict. An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this @@ -1111,6 +1138,12 @@ class Model(fluid.dygraph.Layer): num_workers (int): the number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. + stack_output (bool): whether stack output field like a batch, as for an output + filed of a sample is in shape [X, Y], test_data contains N samples, predict + output field will be in shape [N, X, Y] if stack_output is True, and will + be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs + is False. stack_outputs as False is used for LoDTensor output situation, + it is recommended set as True if outputs contains no LoDTensor. Default False """ if fluid.in_dygraph_mode(): @@ -1137,19 +1170,16 @@ class Model(fluid.dygraph.Layer): if not isinstance(test_loader, Iterable): loader = test_loader() - outputs = None + outputs = [] for data in tqdm.tqdm(loader): - if not fluid.in_dygraph_mode(): - data = data[0] - - outs = self.test(*data) + data = flatten(data) + outputs.append(self.test(data[:len(self._inputs)])) - if outputs is None: - outputs = outs - else: - outputs = [ - np.vstack([x, outs[i]]) for i, x in enumerate(outputs) - ] + # NOTE: for lod tensor output, we should not stack outputs + # for stacking may loss its detail info + outputs = list(zip(*outputs)) + if stack_outputs: + outputs = [np.stack(outs, axis=0) for outs in outputs] self._test_dataloader = None if test_loader is not None and self._adapter._nranks > 1 \ @@ -1161,8 +1191,8 @@ class Model(fluid.dygraph.Layer): """ Args: eval_data (Dataset|DataLoader|None): An iterable data loader is used for - eval. An instance of paddle.fluid.io.Dataset or - paddle.fluid.io.Dataloader is recomended. + eval. An instance of paddle.io.Dataset or + paddle.io.Dataloader is recomended. """ assert isinstance( eval_data, diff --git a/hapi/text/bert/dataloader.py b/hapi/text/bert/dataloader.py index 0f5384b27e8a1539ae24205fbb5080a797608eb7..2cbddac1d266c8ebb26d96f4a3f2a8e81781c562 100644 --- a/hapi/text/bert/dataloader.py +++ b/hapi/text/bert/dataloader.py @@ -25,7 +25,7 @@ from functools import partial import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv -from paddle.fluid.io import BatchSampler, DataLoader, Dataset +from paddle.io import BatchSampler, DataLoader, Dataset from hapi.distributed import DistributedBatchSampler from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor from hapi.text.bert.batching import prepare_batch_data diff --git a/vision/__init__.py b/hapi/vision/__init__.py similarity index 100% rename from vision/__init__.py rename to hapi/vision/__init__.py diff --git a/hapi/vision/models/__init__.py b/hapi/vision/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25148ff23567b8f7a01f44f15248c80fd05e585f --- /dev/null +++ b/hapi/vision/models/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from . import resnet +from . import vgg +from . import mobilenetv1 +from . import mobilenetv2 +from . import darknet +from . import yolov3 +from . import tsm +from . import bmn + +from .resnet import * +from .mobilenetv1 import * +from .mobilenetv2 import * +from .vgg import * +from .darknet import * +from .yolov3 import * +from .tsm import * +from .bmn import * + +__all__ = resnet.__all__ \ + + vgg.__all__ \ + + mobilenetv1.__all__ \ + + mobilenetv2.__all__ \ + + darknet.__all__ \ + + yolov3.__all__ \ + + tsm.__all__ \ + + bmn.__all__ diff --git a/bmn/bmn_model.py b/hapi/vision/models/bmn.py similarity index 83% rename from bmn/bmn_model.py rename to hapi/vision/models/bmn.py index dfde7bcc5cdaac8aa5ea3c069f580308b49ec01f..65ce6eaa0b4b2551e3542e623e6193fb25fb32d1 100644 --- a/bmn/bmn_model.py +++ b/hapi/vision/models/bmn.py @@ -17,12 +17,68 @@ from paddle.fluid import ParamAttr import numpy as np import math -from bmn_utils import get_interp1d_mask -from model import Model, Loss +from hapi.model import Model, Loss + +__all__ = ["BMN", "BmnLoss"] DATATYPE = 'float32' +def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample, + num_sample_perbin): + """ generate sample mask for a boundary-matching pair """ + plen = float(seg_xmax - seg_xmin) + plen_sample = plen / (num_sample * num_sample_perbin - 1.0) + total_samples = [ + seg_xmin + plen_sample * ii + for ii in range(num_sample * num_sample_perbin) + ] + p_mask = [] + for idx in range(num_sample): + bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) * + num_sample_perbin] + bin_vector = np.zeros([tscale]) + for sample in bin_samples: + sample_upper = math.ceil(sample) + sample_decimal, sample_down = math.modf(sample) + if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0: + bin_vector[int(sample_down)] += 1 - sample_decimal + if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0: + bin_vector[int(sample_upper)] += sample_decimal + bin_vector = 1.0 / num_sample_perbin * bin_vector + p_mask.append(bin_vector) + p_mask = np.stack(p_mask, axis=1) + return p_mask + + +def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample, + num_sample_perbin): + """ generate sample mask for each point in Boundary-Matching Map """ + mask_mat = [] + for start_index in range(tscale): + mask_mat_vector = [] + for duration_index in range(dscale): + if start_index + duration_index < tscale: + p_xmin = start_index + p_xmax = start_index + duration_index + center_len = float(p_xmax - p_xmin) + 1 + sample_xmin = p_xmin - center_len * prop_boundary_ratio + sample_xmax = p_xmax + center_len * prop_boundary_ratio + p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax, + tscale, num_sample, + num_sample_perbin) + else: + p_mask = np.zeros([tscale, num_sample]) + mask_mat_vector.append(p_mask) + mask_mat_vector = np.stack(mask_mat_vector, axis=2) + mask_mat.append(mask_mat_vector) + mask_mat = np.stack(mask_mat, axis=3) + mask_mat = mask_mat.astype(np.float32) + + sample_mask = np.reshape(mask_mat, [tscale, -1]) + return sample_mask + + # Net class Conv1D(fluid.dygraph.Layer): def __init__(self, diff --git a/hapi/vision/models/darknet.py b/hapi/vision/models/darknet.py new file mode 100755 index 0000000000000000000000000000000000000000..85e25f4e1205ea62ec878409d640ba42e7335ee2 --- /dev/null +++ b/hapi/vision/models/darknet.py @@ -0,0 +1,219 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from paddle.fluid.dygraph.nn import Conv2D, BatchNorm + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53'] + +# {num_layers: (url, md5)} +pretrain_infos = { + 53: ('https://paddlemodels.bj.bcebos.com/hapi/darknet53.pdparams', + '2506357a5c31e865785112fc614a487d') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + ch_in, + ch_out, + filter_size=3, + stride=1, + groups=1, + padding=0, + act="leaky"): + super(ConvBNLayer, self).__init__() + + self.conv = Conv2D( + num_channels=ch_in, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02)), + bias_attr=False, + act=None) + self.batch_norm = BatchNorm( + num_channels=ch_out, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02), + regularizer=L2Decay(0.)), + bias_attr=ParamAttr( + initializer=fluid.initializer.Constant(0.0), + regularizer=L2Decay(0.))) + + self.act = act + + def forward(self, inputs): + out = self.conv(inputs) + out = self.batch_norm(out) + if self.act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + +class DownSample(fluid.dygraph.Layer): + def __init__(self, + ch_in, + ch_out, + filter_size=3, + stride=2, + padding=1): + + super(DownSample, self).__init__() + + self.conv_bn_layer = ConvBNLayer( + ch_in=ch_in, + ch_out=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding) + self.ch_out = ch_out + def forward(self, inputs): + out = self.conv_bn_layer(inputs) + return out + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out): + super(BasicBlock, self).__init__() + + self.conv1 = ConvBNLayer( + ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=1, + padding=0) + self.conv2 = ConvBNLayer( + ch_in=ch_out, + ch_out=ch_out*2, + filter_size=3, + stride=1, + padding=1) + def forward(self, inputs): + conv1 = self.conv1(inputs) + conv2 = self.conv2(conv1) + out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None) + return out + +class LayerWarp(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out, count): + super(LayerWarp,self).__init__() + + self.basicblock0 = BasicBlock(ch_in, ch_out) + self.res_out_list = [] + for i in range(1,count): + res_out = self.add_sublayer("basic_block_%d" % (i), + BasicBlock( + ch_out*2, + ch_out)) + self.res_out_list.append(res_out) + self.ch_out = ch_out + def forward(self,inputs): + y = self.basicblock0(inputs) + for basic_block_i in self.res_out_list: + y = basic_block_i(y) + return y + + +DarkNet_cfg = {53: ([1, 2, 8, 8, 4])} + + +class DarkNet(Model): + """DarkNet model from + `"YOLOv3: An Incremental Improvement" `_ + + Args: + num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53. + ch_in (int): channel number of input data, default 3. + """ + + def __init__(self, num_layers=53, ch_in=3): + super(DarkNet, self).__init__() + assert num_layers in DarkNet_cfg.keys(), \ + "only support num_layers in {} currently" \ + .format(DarkNet_cfg.keys()) + self.stages = DarkNet_cfg[num_layers] + self.stages = self.stages[0:5] + + self.conv0 = ConvBNLayer( + ch_in=ch_in, + ch_out=32, + filter_size=3, + stride=1, + padding=1) + + self.downsample0 = DownSample( + ch_in=32, + ch_out=32 * 2) + self.darknet53_conv_block_list = [] + self.downsample_list = [] + ch_in = [64,128,256,512,1024] + for i, stage in enumerate(self.stages): + conv_block = self.add_sublayer( + "stage_%d" % (i), + LayerWarp( + int(ch_in[i]), + 32*(2**i), + stage)) + self.darknet53_conv_block_list.append(conv_block) + for i in range(len(self.stages) - 1): + downsample = self.add_sublayer( + "stage_%d_downsample" % i, + DownSample( + ch_in = 32*(2**(i+1)), + ch_out = 32*(2**(i+2)))) + self.downsample_list.append(downsample) + + def forward(self,inputs): + + out = self.conv0(inputs) + out = self.downsample0(out) + blocks = [] + for i, conv_block_i in enumerate(self.darknet53_conv_block_list): + out = conv_block_i(out) + blocks.append(out) + if i < len(self.stages) - 1: + out = self.downsample_list[i](out) + return blocks[-1:-4:-1] + + +def _darknet(num_layers=53, input_channels=3, pretrained=True): + model = DarkNet(num_layers, input_channels) + if pretrained: + assert num_layers in pretrain_infos.keys(), \ + "DarkNet{} do not have pretrained weights now, " \ + "pretrained should be set as False".format(num_layers) + weight_path = get_weights_path(*(pretrain_infos[num_layers])) + assert weight_path.endswith('.pdparams'), \ + "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + return model + + +def darknet53(input_channels=3, pretrained=True): + """DarkNet 53-layer model + + Args: + input_channels (bool): channel number of input data, default 3. + pretrained (bool): If True, returns a model pre-trained on ImageNet, + default True. + """ + return _darknet(53, input_channels, pretrained) diff --git a/hapi/vision/models/mobilenetv1.py b/hapi/vision/models/mobilenetv1.py new file mode 100644 index 0000000000000000000000000000000000000000..ff27cb9c5d7745361858c3f6ec13e5865fafa605 --- /dev/null +++ b/hapi/vision/models/mobilenetv1.py @@ -0,0 +1,287 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = ['MobileNetV1', 'mobilenet_v1'] + +model_urls = { + 'mobilenetv1_1.0': + ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams', + 'bf0d25cb0bed1114d9dac9384ce2b4a6') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=self.full_name() + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), + bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), + moving_mean_name=self.full_name() + "_bn" + '_mean', + moving_variance_name=self.full_name() + "_bn" + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class DepthwiseSeparable(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + super(DepthwiseSeparable, self).__init__() + + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False) + + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + + def forward(self, inputs): + y = self._depthwise_conv(inputs) + y = self._pointwise_conv(y) + return y + + +class MobileNetV1(Model): + """MobileNetV1 model from + `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" `_. + + Args: + scale (float): scale of channels in each layer. Default: 1.0. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + """ + + def __init__(self, + scale=1.0, + num_classes=1000, + with_pool=True, + classifier_activation='softmax'): + super(MobileNetV1, self).__init__() + self.scale = scale + self.dwsl = [] + self.num_classes = num_classes + self.with_pool = with_pool + + self.conv1 = ConvBNLayer( + num_channels=3, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + dws21 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(32 * scale), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale), + name="conv2_1") + self.dwsl.append(dws21) + + dws22 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(64 * scale), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale), + name="conv2_2") + self.dwsl.append(dws22) + + dws31 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale), + name="conv3_1") + self.dwsl.append(dws31) + + dws32 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale), + name="conv3_2") + self.dwsl.append(dws32) + + dws41 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale), + name="conv4_1") + self.dwsl.append(dws41) + + dws42 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale), + name="conv4_2") + self.dwsl.append(dws42) + + for i in range(5): + tmp = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale), + name="conv5_" + str(i + 1)) + self.dwsl.append(tmp) + + dws56 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale), + name="conv5_6") + self.dwsl.append(dws56) + + dws6 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(1024 * scale), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale), + name="conv6") + self.dwsl.append(dws6) + + if with_pool: + self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) + + if num_classes > -1: + self.out = Linear( + int(1024 * scale), + num_classes, + act=classifier_activation, + param_attr=ParamAttr( + initializer=MSRA(), name=self.full_name() + "fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + def forward(self, inputs): + y = self.conv1(inputs) + for dws in self.dwsl: + y = dws(y) + + if self.with_pool: + y = self.pool2d_avg(y) + + if self.num_classes > 0: + y = fluid.layers.reshape(y, shape=[-1, 1024]) + y = self.out(y) + return y + + +def _mobilenet(arch, pretrained=False, **kwargs): + model = MobileNetV1(num_classes=1000, with_pool=True, **kwargs) + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path(model_urls[arch][0], + model_urls[arch][1]) + assert weight_path.endswith( + '.pdparams'), "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + + return model + + +def mobilenet_v1(pretrained=False, scale=1.0): + """MobileNetV1 + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + scale: (float): scale of channels in each layer. Default: 1.0. + """ + model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale) + return model diff --git a/hapi/vision/models/mobilenetv2.py b/hapi/vision/models/mobilenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..02db68e569cea06dac876dd3b7bc044cd15542f7 --- /dev/null +++ b/hapi/vision/models/mobilenetv2.py @@ -0,0 +1,261 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = ['MobileNetV2', 'mobilenet_v2'] + +model_urls = { + 'mobilenetv2_1.0': + ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams', + '8ff74f291f72533f2a7956a4efff9d88') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + use_cudnn=True): + super(ConvBNLayer, self).__init__() + + tmp_param = ParamAttr(name=self.full_name() + "_weights") + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=tmp_param, + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), + bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), + moving_mean_name=self.full_name() + "_bn" + '_mean', + moving_variance_name=self.full_name() + "_bn" + '_variance') + + def forward(self, inputs, if_act=True): + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = fluid.layers.relu6(y) + return y + + +class InvertedResidualUnit(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_in_filter, + num_filters, + stride, + filter_size, + padding, + expansion_factor, ): + super(InvertedResidualUnit, self).__init__() + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1) + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + use_cudnn=False) + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1) + + def forward(self, inputs, ifshortcut): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = fluid.layers.elementwise_add(inputs, y) + return y + + +class InvresiBlocks(fluid.dygraph.Layer): + def __init__(self, in_c, t, c, n, s): + super(InvresiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t) + + self._inv_blocks = [] + for i in range(1, n): + tmp = self.add_sublayer( + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t), + name=self.full_name() + "_" + str(i + 1)) + self._inv_blocks.append(tmp) + + def forward(self, inputs): + y = self._first_block(inputs, ifshortcut=False) + for inv_block in self._inv_blocks: + y = inv_block(y, ifshortcut=True) + return y + + +class MobileNetV2(Model): + """MobileNetV2 model from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + scale (float): scale of channels in each layer. Default: 1.0. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + """ + + def __init__(self, + scale=1.0, + num_classes=1000, + with_pool=True, + classifier_activation='softmax'): + super(MobileNetV2, self).__init__() + self.scale = scale + self.num_classes = num_classes + self.with_pool = with_pool + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + self._conv1 = ConvBNLayer( + num_channels=3, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1) + + self._invl = [] + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + tmp = self.add_sublayer( + sublayer=InvresiBlocks( + in_c=in_c, t=t, c=int(c * scale), n=n, s=s), + name='conv' + str(i)) + self._invl.append(tmp) + in_c = int(c * scale) + + self._out_c = int(1280 * scale) if scale > 1.0 else 1280 + self._conv9 = ConvBNLayer( + num_channels=in_c, + num_filters=self._out_c, + filter_size=1, + stride=1, + padding=0) + + if with_pool: + self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) + + if num_classes > 0: + tmp_param = ParamAttr(name=self.full_name() + "fc10_weights") + self._fc = Linear( + self._out_c, + num_classes, + act=classifier_activation, + param_attr=tmp_param, + bias_attr=ParamAttr(name="fc10_offset")) + + def forward(self, inputs): + y = self._conv1(inputs, if_act=True) + for inv in self._invl: + y = inv(y) + y = self._conv9(y, if_act=True) + + if self.with_pool: + y = self._pool2d_avg(y) + if self.num_classes > 0: + y = fluid.layers.reshape(y, shape=[-1, self._out_c]) + y = self._fc(y) + return y + + +def _mobilenet(arch, pretrained=False, **kwargs): + model = MobileNetV2(num_classes=1000, with_pool=True, **kwargs) + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path(model_urls[arch][0], + model_urls[arch][1]) + assert weight_path.endswith( + '.pdparams'), "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + + return model + + +def mobilenet_v2(pretrained=False, scale=1.0): + """MobileNetV2 + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + scale: (float): scale of channels in each layer. Default: 1.0. + """ + model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale) + return model diff --git a/hapi/vision/models/resnet.py b/hapi/vision/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..804cc3534ad4c3cda4f800b41d8567922450e037 --- /dev/null +++ b/hapi/vision/models/resnet.py @@ -0,0 +1,310 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import math +import paddle.fluid as fluid + +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.container import Sequential + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = [ + 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152' +] + +model_urls = { + 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', + '0884c9087266496c41c60d14a96f8530') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) + + self._batch_norm = BatchNorm(num_filters, act=act) + + def forward(self, inputs): + x = self._conv(inputs) + x = self._batch_norm(x) + + return x + + +class BasicBlock(fluid.dygraph.Layer): + + expansion = 1 + + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BasicBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride) + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = short + conv1 + + return fluid.layers.relu(y) + + +class BottleneckBlock(fluid.dygraph.Layer): + + expansion = 4 + + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * self.expansion, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * self.expansion, + filter_size=1, + stride=stride) + + self.shortcut = shortcut + + self._num_channels_out = num_filters * self.expansion + + def forward(self, inputs): + x = self.conv0(inputs) + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + x = fluid.layers.elementwise_add(x=short, y=conv2) + + return fluid.layers.relu(x) + + +class ResNet(Model): + """ResNet model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + Block (BasicBlock|BottleneckBlock): block module of model. + depth (int): layers of resnet, default: 50. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + """ + + def __init__(self, + Block, + depth=50, + num_classes=1000, + with_pool=True, + classifier_activation='softmax'): + super(ResNet, self).__init__() + + self.num_classes = num_classes + self.with_pool = with_pool + + layer_config = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + } + assert depth in layer_config.keys(), \ + "supported depth are {} but input layer is {}".format( + layer_config.keys(), depth) + + layers = layer_config[depth] + + in_channels = 64 + out_channels = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.layers = [] + for idx, num_blocks in enumerate(layers): + blocks = [] + shortcut = False + for b in range(num_blocks): + if b == 1: + in_channels = out_channels[idx] * Block.expansion + block = Block( + num_channels=in_channels, + num_filters=out_channels[idx], + stride=2 if b == 0 and idx != 0 else 1, + shortcut=shortcut) + blocks.append(block) + shortcut = True + layer = self.add_sublayer("layer_{}".format(idx), + Sequential(*blocks)) + self.layers.append(layer) + + if with_pool: + self.global_pool = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + if num_classes > 0: + stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0) + self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1 + self.fc = Linear( + self.fc_input_dim, + num_classes, + act=classifier_activation, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + + def forward(self, inputs): + x = self.conv(inputs) + x = self.pool(x) + for layer in self.layers: + x = layer(x) + + if self.with_pool: + x = self.global_pool(x) + + if self.num_classes > -1: + x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) + x = self.fc(x) + return x + + +def _resnet(arch, Block, depth, pretrained): + model = ResNet(Block, depth, num_classes=1000, with_pool=True) + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path(model_urls[arch][0], + model_urls[arch][1]) + assert weight_path.endswith( + '.pdparams'), "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + return model + + +def resnet18(pretrained=False): + """ResNet 18-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + return _resnet('resnet18', BasicBlock, 18, pretrained) + + +def resnet34(pretrained=False): + """ResNet 34-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + return _resnet('resnet34', BasicBlock, 34, pretrained) + + +def resnet50(pretrained=False): + """ResNet 50-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + return _resnet('resnet50', BottleneckBlock, 50, pretrained) + + +def resnet101(pretrained=False): + """ResNet 101-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + return _resnet('resnet101', BottleneckBlock, 101, pretrained) + + +def resnet152(pretrained=False): + """ResNet 152-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + return _resnet('resnet152', BottleneckBlock, 152, pretrained) diff --git a/hapi/vision/models/tsm.py b/hapi/vision/models/tsm.py new file mode 100644 index 0000000000000000000000000000000000000000..8b50f7073ee6e229acf4953c778ef60e2815cdb8 --- /dev/null +++ b/hapi/vision/models/tsm.py @@ -0,0 +1,212 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import paddle.fluid as fluid +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = ["TSM_ResNet", "tsm_resnet50"] + +# {num_layers: (url, md5)} +pretrain_infos = { + 50: ('https://paddlemodels.bj.bcebos.com/hapi/tsm_resnet50.pdparams', + '5755dc538e422589f417f7b38d7cc3c7') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=None, + act=None, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=fluid.param_attr.ParamAttr()) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + seg_num=8): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) + self.shortcut = shortcut + self.seg_num = seg_num + self._num_channels_out = int(num_filters * 4) + + def forward(self, inputs): + shifts = fluid.layers.temporal_shift(inputs, self.seg_num, 1.0 / 8) + y = self.conv0(shifts) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class TSM_ResNet(Model): + """ + TSM network with ResNet as backbone + + Args: + num_layers (int): ResNet layer number, only support 50 currently. + Default 50. + seg_num (int): segment number of each video sample. Default 8. + num_classes (int): video class number. Default 400. + """ + def __init__(self, num_layers=50, seg_num=8, num_classes=400): + super(TSM_ResNet, self).__init__() + + self.layers = num_layers + self.seg_num = seg_num + self.class_dim = num_classes + + if self.layers == 50: + depth = [3, 4, 6, 3] + else: + raise NotImplementedError + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.bottleneck_block_list = [] + num_channels = 64 + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + seg_num=self.seg_num)) + num_channels = int(bottleneck_block._num_channels_out) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear( + 2048, + self.class_dim, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr( + learning_rate=2.0, regularizer=fluid.regularizer.L2Decay(0.))) + + def forward(self, inputs): + y = fluid.layers.reshape( + inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]]) + y = self.conv(y) + y = self.pool2d_max(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.dropout(y, dropout_prob=0.5) + y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]]) + y = fluid.layers.reduce_mean(y, dim=1) + y = fluid.layers.reshape(y, shape=[-1, 2048]) + y = self.out(y) + return y + + +def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): + model = TSM_ResNet(num_layers, seg_num, num_classes) + if pretrained: + assert num_layers in pretrain_infos.keys(), \ + "TSM-ResNet{} do not have pretrained weights now, " \ + "pretrained should be set as False".format(num_layers) + weight_path = get_weights_path(*(pretrain_infos[num_layers])) + assert weight_path.endswith('.pdparams'), \ + "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + return model + + +def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True): + """TSM model with 50-layer ResNet as backbone + + Args: + seg_num (int): segment number of each video sample. Default 8. + num_classes (int): video class number. Default 400. + pretrained (bool): If True, returns a model with pre-trained model + on COCO, default True + """ + return _tsm_resnet(50, seg_num, num_classes, pretrained) diff --git a/hapi/vision/models/vgg.py b/hapi/vision/models/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..5ef09bd665e4308739651d868203a4a56b14de38 --- /dev/null +++ b/hapi/vision/models/vgg.py @@ -0,0 +1,189 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.container import Sequential + +from hapi.model import Model +from hapi.download import get_weights_path + +__all__ = [ + 'VGG', + 'vgg11', + 'vgg13', + 'vgg16', + 'vgg19', +] + +model_urls = { + 'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams', + 'c788f453a3b999063e8da043456281ee') +} + + +class Classifier(fluid.dygraph.Layer): + def __init__(self, num_classes, classifier_activation='softmax'): + super(Classifier, self).__init__() + self.linear1 = Linear(512 * 7 * 7, 4096) + self.linear2 = Linear(4096, 4096) + self.linear3 = Linear(4096, num_classes, act=classifier_activation) + + def forward(self, x): + x = self.linear1(x) + x = fluid.layers.relu(x) + x = fluid.layers.dropout(x, 0.5) + x = self.linear2(x) + x = fluid.layers.relu(x) + x = fluid.layers.dropout(x, 0.5) + out = self.linear3(x) + return out + + +class VGG(Model): + """VGG model from + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + features (fluid.dygraph.Layer): vgg features create by function make_layers. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + """ + + def __init__(self, + features, + num_classes=1000, + classifier_activation='softmax'): + super(VGG, self).__init__() + self.features = features + self.num_classes = num_classes + + if num_classes > 0: + classifier = Classifier(num_classes, classifier_activation) + self.classifier = self.add_sublayer("classifier", + Sequential(classifier)) + + def forward(self, x): + x = self.features(x) + + if self.num_classes > 0: + x = fluid.layers.flatten(x, 1) + x = self.classifier(x) + return x + + +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + + for v in cfg: + if v == 'M': + layers += [Pool2D(pool_size=2, pool_stride=2)] + else: + if batch_norm: + conv2d = Conv2D(in_channels, v, filter_size=3, padding=1) + layers += [conv2d, BatchNorm(v, act='relu')] + else: + conv2d = Conv2D( + in_channels, v, filter_size=3, padding=1, act='relu') + layers += [conv2d] + in_channels = v + return Sequential(*layers) + + +cfgs = { + 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'B': + [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'D': [ + 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', + 512, 512, 512, 'M' + ], + 'E': [ + 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, + 512, 'M', 512, 512, 512, 512, 'M' + ], +} + + +def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): + model = VGG(make_layers( + cfgs[cfg], batch_norm=batch_norm), + num_classes=1000, + **kwargs) + + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path(model_urls[arch][0], + model_urls[arch][1]) + assert weight_path.endswith( + '.pdparams'), "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + + return model + + +def vgg11(pretrained=False, batch_norm=False): + """VGG 11-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + batch_norm (bool): If True, returns a model with batch_norm layer. Default: False. + """ + model_name = 'vgg11' + if batch_norm: + model_name += ('_bn') + return _vgg(model_name, 'A', batch_norm, pretrained) + + +def vgg13(pretrained=False, batch_norm=False): + """VGG 13-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + batch_norm (bool): If True, returns a model with batch_norm layer. Default: False. + """ + model_name = 'vgg13' + if batch_norm: + model_name += ('_bn') + return _vgg(model_name, 'B', batch_norm, pretrained) + + +def vgg16(pretrained=False, batch_norm=False): + """VGG 16-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + batch_norm (bool): If True, returns a model with batch_norm layer. Default: False. + """ + model_name = 'vgg16' + if batch_norm: + model_name += ('_bn') + return _vgg(model_name, 'D', batch_norm, pretrained) + + +def vgg19(pretrained=False, batch_norm=False): + """VGG 19-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. + batch_norm (bool): If True, returns a model with batch_norm layer. Default: False. + """ + model_name = 'vgg19' + if batch_norm: + model_name += ('_bn') + return _vgg(model_name, 'E', batch_norm, pretrained) diff --git a/hapi/vision/models/yolov3.py b/hapi/vision/models/yolov3.py new file mode 100644 index 0000000000000000000000000000000000000000..840a402e3e1b473cc3c3deffc1b2d0fe6e7a2307 --- /dev/null +++ b/hapi/vision/models/yolov3.py @@ -0,0 +1,275 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.dygraph.nn import Conv2D +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from hapi.model import Model, Loss +from hapi.download import get_weights_path +from .darknet import darknet53, ConvBNLayer + +__all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53'] + +# {num_layers: (url, md5)} +pretrain_infos = { + 53: ('https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams', + 'aed7dd45124ff2e844ae3bd5ba6c91d2') +} + + +class YoloDetectionBlock(fluid.dygraph.Layer): + def __init__(self, ch_in, channel): + super(YoloDetectionBlock, self).__init__() + + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2".format(channel) + + self.conv0 = ConvBNLayer( + ch_in=ch_in, + ch_out=channel, + filter_size=1, + stride=1, + padding=0) + self.conv1 = ConvBNLayer( + ch_in=channel, + ch_out=channel*2, + filter_size=3, + stride=1, + padding=1) + self.conv2 = ConvBNLayer( + ch_in=channel*2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0) + self.conv3 = ConvBNLayer( + ch_in=channel, + ch_out=channel*2, + filter_size=3, + stride=1, + padding=1) + self.route = ConvBNLayer( + ch_in=channel*2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0) + self.tip = ConvBNLayer( + ch_in=channel, + ch_out=channel*2, + filter_size=3, + stride=1, + padding=1) + + def forward(self, inputs): + out = self.conv0(inputs) + out = self.conv1(out) + out = self.conv2(out) + out = self.conv3(out) + route = self.route(out) + tip = self.tip(route) + return route, tip + + +class YOLOv3(Model): + """YOLOv3 model from + `"YOLOv3: An Incremental Improvement" `_ + + Args: + num_classes (int): class number, default 80. + model_mode (str): 'train', 'eval', 'test' mode, network structure + will be diffrent in the output layer and data, in 'train' mode, + no output layer append, in 'eval' and 'test', output feature + map will be decode to predictions by 'fluid.layers.yolo_box', + in 'eval' mode, return feature maps and predictions, in 'test' + mode, only return predictions. Default 'train'. + + """ + + def __init__(self, num_classes=80, model_mode='train'): + super(YOLOv3, self).__init__() + self.num_classes = num_classes + assert str.lower(model_mode) in ['train', 'eval', 'test'], \ + "model_mode should be 'train' 'eval' or 'test', but got " \ + "{}".format(model_mode) + self.model_mode = str.lower(model_mode) + self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, + 59, 119, 116, 90, 156, 198, 373, 326] + self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + self.valid_thresh = 0.005 + self.nms_thresh = 0.45 + self.nms_topk = 400 + self.nms_posk = 100 + self.draw_thresh = 0.5 + + self.backbone = darknet53(pretrained=(model_mode=='train')) + self.block_outputs = [] + self.yolo_blocks = [] + self.route_blocks = [] + + for idx, num_chan in enumerate([1024, 768, 384]): + yolo_block = self.add_sublayer( + "yolo_detecton_block_{}".format(idx), + YoloDetectionBlock(num_chan, 512 // (2**idx))) + self.yolo_blocks.append(yolo_block) + + num_filters = len(self.anchor_masks[idx]) * (self.num_classes + 5) + + block_out = self.add_sublayer( + "block_out_{}".format(idx), + Conv2D(num_channels=1024 // (2**idx), + num_filters=num_filters, + filter_size=1, + act=None, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02)), + bias_attr=ParamAttr( + initializer=fluid.initializer.Constant(0.0), + regularizer=L2Decay(0.)))) + self.block_outputs.append(block_out) + if idx < 2: + route = self.add_sublayer( + "route2_{}".format(idx), + ConvBNLayer(ch_in=512 // (2**idx), + ch_out=256 // (2**idx), + filter_size=1, + act='leaky_relu')) + self.route_blocks.append(route) + + def forward(self, img_id, img_shape, inputs): + outputs = [] + boxes = [] + scores = [] + downsample = 32 + + feats = self.backbone(inputs) + route = None + for idx, feat in enumerate(feats): + if idx > 0: + feat = fluid.layers.concat(input=[route, feat], axis=1) + route, tip = self.yolo_blocks[idx](feat) + block_out = self.block_outputs[idx](tip) + outputs.append(block_out) + + if idx < 2: + route = self.route_blocks[idx](route) + route = fluid.layers.resize_nearest(route, scale=2) + + if self.model_mode != 'train': + anchor_mask = self.anchor_masks[idx] + mask_anchors = [] + for m in anchor_mask: + mask_anchors.append(self.anchors[2 * m]) + mask_anchors.append(self.anchors[2 * m + 1]) + b, s = fluid.layers.yolo_box( + x=block_out, + img_size=img_shape, + anchors=mask_anchors, + class_num=self.num_classes, + conf_thresh=self.valid_thresh, + downsample_ratio=downsample) + + boxes.append(b) + scores.append(fluid.layers.transpose(s, perm=[0, 2, 1])) + + downsample //= 2 + + if self.model_mode == 'train': + return outputs + + preds = [img_id, + fluid.layers.multiclass_nms( + bboxes=fluid.layers.concat(boxes, axis=1), + scores=fluid.layers.concat(scores, axis=2), + score_threshold=self.valid_thresh, + nms_top_k=self.nms_topk, + keep_top_k=self.nms_posk, + nms_threshold=self.nms_thresh, + background_label=-1)] + + if self.model_mode == 'test': + return preds + + # model_mode == "eval" + return outputs + preds + +class YoloLoss(Loss): + def __init__(self, num_classes=80, num_max_boxes=50): + super(YoloLoss, self).__init__() + self.num_classes = num_classes + self.num_max_boxes = num_max_boxes + self.ignore_thresh = 0.7 + self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, + 59, 119, 116, 90, 156, 198, 373, 326] + self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + + def forward(self, outputs, labels): + downsample = 32 + gt_box, gt_label, gt_score = labels + losses = [] + + for idx, out in enumerate(outputs): + if idx == 3: break # debug + anchor_mask = self.anchor_masks[idx] + loss = fluid.layers.yolov3_loss( + x=out, + gt_box=gt_box, + gt_label=gt_label, + gt_score=gt_score, + anchor_mask=anchor_mask, + downsample_ratio=downsample, + anchors=self.anchors, + class_num=self.num_classes, + ignore_thresh=self.ignore_thresh, + use_label_smooth=True) + loss = fluid.layers.reduce_mean(loss) + losses.append(loss) + downsample //= 2 + return losses + + +def _yolov3_darknet(num_layers=53, num_classes=80, + model_mode='train', pretrained=True): + model = YOLOv3(num_classes, model_mode) + if pretrained: + assert num_layers in pretrain_infos.keys(), \ + "YOLOv3-DarkNet{} do not have pretrained weights now, " \ + "pretrained should be set as False".format(num_layers) + weight_path = get_weights_path(*(pretrain_infos[num_layers])) + assert weight_path.endswith('.pdparams'), \ + "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + return model + + +def yolov3_darknet53(num_classes=80, model_mode='train', pretrained=True): + """YOLOv3 model with 53-layer DarkNet as backbone + + Args: + num_classes (int): class number, default 80. + model_mode (str): 'train', 'eval', 'test' mode, network structure + will be diffrent in the output layer and data, in 'train' mode, + no output layer append, in 'eval' and 'test', output feature + map will be decode to predictions by 'fluid.layers.yolo_box', + in 'eval' mode, return feature maps and predictions, in 'test' + mode, only return predictions. Default 'train'. + pretrained (bool): If True, returns a model with pre-trained model + on COCO, default True + """ + return _yolov3_darknet(53, num_classes, model_mode, pretrained) diff --git a/vision/transforms/__init__.py b/hapi/vision/transforms/__init__.py similarity index 100% rename from vision/transforms/__init__.py rename to hapi/vision/transforms/__init__.py diff --git a/vision/transforms/detection_transforms.py b/hapi/vision/transforms/detection_transforms.py similarity index 100% rename from vision/transforms/detection_transforms.py rename to hapi/vision/transforms/detection_transforms.py diff --git a/vision/transforms/functional.py b/hapi/vision/transforms/functional.py similarity index 100% rename from vision/transforms/functional.py rename to hapi/vision/transforms/functional.py diff --git a/vision/transforms/transforms.py b/hapi/vision/transforms/transforms.py similarity index 92% rename from vision/transforms/transforms.py rename to hapi/vision/transforms/transforms.py index 3b935ee2ac1bf208f07bcfe99eea130fe07e0966..e71a74f17be5e30487c1c0ce88ada067748ae57a 100644 --- a/vision/transforms/transforms.py +++ b/hapi/vision/transforms/transforms.py @@ -129,7 +129,7 @@ class Resize(object): self.size = size self.interpolation = interpolation - def __call__(self, img): + def __call__(self, img, lbl): """ Args: img (PIL Image): Image to be scaled. @@ -137,7 +137,7 @@ class Resize(object): Returns: PIL Image: Rescaled image. """ - return F.resize(img, self.size, self.interpolation) + return F.resize(img, self.size, self.interpolation), lbl class RandomResizedCrop(object): @@ -199,10 +199,10 @@ class RandomResizedCrop(object): y = (height - h) // 2 return x, y, w, h - def __call__(self, img): + def __call__(self, img, lbl): x, y, w, h = self._get_params(img) cropped_img = img[y:y + h, x:x + w] - return F.resize(cropped_img, self.output_size, self.interpolation) + return F.resize(cropped_img, self.output_size, self.interpolation), lbl class CenterCropResize(object): @@ -230,10 +230,10 @@ class CenterCropResize(object): y = (w + 1 - c) // 2 return c, x, y - def __call__(self, img): + def __call__(self, img, lbl): c, x, y = self._get_params(img) cropped_img = img[x:x + c, y:y + c, :] - return F.resize(cropped_img, self.size, self.interpolation) + return F.resize(cropped_img, self.size, self.interpolation), lbl class CenterCrop(object): @@ -257,10 +257,10 @@ class CenterCrop(object): y = int(round((h - th) / 2.0)) return x, y - def __call__(self, img): + def __call__(self, img, lbl): x, y = self._get_params(img) th, tw = self.output_size - return img[y:y + th, x:x + tw] + return img[y:y + th, x:x + tw], lbl class RandomHorizontalFlip(object): @@ -273,10 +273,10 @@ class RandomHorizontalFlip(object): def __init__(self, prob=0.5): self.prob = prob - def __call__(self, img): + def __call__(self, img, lbl): if np.random.random() < self.prob: - return F.flip(img, code=1) - return img + return F.flip(img, code=1), lbl + return img, lbl class RandomVerticalFlip(object): @@ -289,10 +289,10 @@ class RandomVerticalFlip(object): def __init__(self, prob=0.5): self.prob = prob - def __call__(self, img): + def __call__(self, img, lbl): if np.random.random() < self.prob: - return F.flip(img, code=0) - return img + return F.flip(img, code=0), lbl + return img, lbl class Normalize(object): @@ -317,8 +317,8 @@ class Normalize(object): self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) - def __call__(self, img): - return (img - self.mean) / self.std + def __call__(self, img, lbl): + return (img - self.mean) / self.std, lbl class Permute(object): @@ -337,10 +337,10 @@ class Permute(object): ], "Only support 'CHW' mode, but received mode: {}".format(mode) self.mode = mode - def __call__(self, img): + def __call__(self, img, lbl): if self.mode == "CHW": - return img.transpose((2, 0, 1))[::-1, ...] - return img + return img.transpose((2, 0, 1))[::-1, ...], lbl + return img, lbl class GaussianNoise(object): @@ -356,11 +356,11 @@ class GaussianNoise(object): self.mean = np.array(mean, dtype=np.float32) self.std = np.array(std, dtype=np.float32) - def __call__(self, img): + def __call__(self, img, lbl): dtype = img.dtype noise = np.random.normal(self.mean, self.std, img.shape) * 255 img = img + noise.astype(np.float32) - return np.clip(img, 0, 255).astype(dtype) + return np.clip(img, 0, 255).astype(dtype), lbl class BrightnessTransform(object): @@ -376,15 +376,15 @@ class BrightnessTransform(object): raise ValueError("brightness value should be non-negative") self.value = value - def __call__(self, img): + def __call__(self, img, lbl): if self.value == 0: - return img + return img, lbl dtype = img.dtype img = img.astype(np.float32) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) img = img * alpha - return img.clip(0, 255).astype(dtype) + return img.clip(0, 255).astype(dtype), lbl class ContrastTransform(object): @@ -400,16 +400,16 @@ class ContrastTransform(object): raise ValueError("contrast value should be non-negative") self.value = value - def __call__(self, img): + def __call__(self, img, lbl): if self.value == 0: - return img + return img, lbl dtype = img.dtype img = img.astype(np.float32) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * ( 1 - alpha) - return img.clip(0, 255).astype(dtype) + return img.clip(0, 255).astype(dtype), lbl class SaturationTransform(object): @@ -425,9 +425,9 @@ class SaturationTransform(object): raise ValueError("saturation value should be non-negative") self.value = value - def __call__(self, img): + def __call__(self, img, lbl): if self.value == 0: - return img + return img, lbl dtype = img.dtype img = img.astype(np.float32) @@ -435,7 +435,7 @@ class SaturationTransform(object): gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = gray_img[..., np.newaxis] img = img * alpha + gray_img * (1 - alpha) - return img.clip(0, 255).astype(dtype) + return img.clip(0, 255).astype(dtype), lbl class HueTransform(object): @@ -451,9 +451,9 @@ class HueTransform(object): raise ValueError("hue value should be in [0.0, 0.5]") self.value = value - def __call__(self, img): + def __call__(self, img, lbl): if self.value == 0: - return img + return img, lbl dtype = img.dtype img = img.astype(np.uint8) @@ -466,7 +466,7 @@ class HueTransform(object): with np.errstate(over="ignore"): h += np.uint8(alpha * 255) hsv_img = cv2.merge([h, s, v]) - return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype) + return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl class ColorJitter(object): @@ -501,5 +501,5 @@ class ColorJitter(object): random.shuffle(transforms) self.transforms = Compose(transforms) - def __call__(self, img): - return self.transforms(img) + def __call__(self, img, lbl): + return self.transforms(img), lbl