提交 3f4149cd 编写于 作者: D dengkaipeng

vision Models to vision.models and example

上级 1c9f502c
...@@ -20,7 +20,7 @@ import json ...@@ -20,7 +20,7 @@ import json
sys.path.append('../') sys.path.append('../')
from metrics import Metric from hapi.metrics import Metric
from bmn_utils import boundary_choose, bmn_post_processing from bmn_utils import boundary_choose, bmn_post_processing
......
...@@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path): ...@@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path):
outfile.close() outfile.close()
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
...@@ -18,11 +18,9 @@ import sys ...@@ -18,11 +18,9 @@ import sys
import logging import logging
import paddle.fluid as fluid import paddle.fluid as fluid
sys.path.append('../') from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from model import set_device, Input
from bmn_metric import BmnMetric from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
......
...@@ -18,11 +18,9 @@ import os ...@@ -18,11 +18,9 @@ import os
import logging import logging
import paddle.fluid as fluid import paddle.fluid as fluid
sys.path.append('../') from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from model import set_device, Input
from bmn_metric import BmnMetric from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
......
...@@ -18,10 +18,8 @@ import logging ...@@ -18,10 +18,8 @@ import logging
import sys import sys
import os import os
sys.path.append('../') from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from model import set_device, Input
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
......
...@@ -18,8 +18,8 @@ import math ...@@ -18,8 +18,8 @@ import math
import random import random
import numpy as np import numpy as np
from datasets.folder import DatasetFolder from hapi.datasets import DatasetFolder
from transform import transforms from hapi.vision.transforms import transforms
from paddle import fluid from paddle import fluid
...@@ -45,7 +45,8 @@ class ImageNetDataset(DatasetFolder): ...@@ -45,7 +45,8 @@ class ImageNetDataset(DatasetFolder):
def __getitem__(self, idx): def __getitem__(self, idx):
img_path, label = self.samples[idx] img_path, label = self.samples[idx]
img = cv2.imread(img_path).astype(np.float32) img = cv2.imread(img_path).astype(np.float32)
return self.transform(img), [label] label = np.array([label])
return self.transform(img, label)
def __len__(self): def __len__(self):
return len(self.samples) return len(self.samples)
...@@ -24,16 +24,18 @@ sys.path.append('../') ...@@ -24,16 +24,18 @@ sys.path.append('../')
import time import time
import math import math
import numpy as np import numpy as np
import models
import paddle.fluid as fluid
from model import CrossEntropy, Input, set_device import paddle.fluid as fluid
from imagenet_dataset import ImageNetDataset
from distributed import DistributedBatchSampler
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from metrics import Accuracy
from paddle.io import BatchSampler, DataLoader from paddle.io import BatchSampler, DataLoader
from hapi.model import CrossEntropy, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.metrics import Accuracy
import hapi.vision.models as models
from imagenet_dataset import ImageNetDataset
def make_optimizer(step_per_epoch, parameter_list=None): def make_optimizer(step_per_epoch, parameter_list=None):
base_lr = FLAGS.lr base_lr = FLAGS.lr
......
...@@ -19,8 +19,8 @@ import os ...@@ -19,8 +19,8 @@ import os
import argparse import argparse
import numpy as np import numpy as np
from model import Input, set_device from hapi.model import Input, set_device
from models import tsm_resnet50 from hapi.vision.models import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
......
...@@ -22,9 +22,9 @@ import numpy as np ...@@ -22,9 +22,9 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from model import Model, CrossEntropy, Input, set_device from hapi.model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy from hapi.metrics import Accuracy
from models import tsm_resnet50 from hapi.vision.models import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
......
...@@ -24,11 +24,11 @@ from paddle import fluid ...@@ -24,11 +24,11 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.io import DataLoader from paddle.io import DataLoader
from model import Model, Input, set_device from hapi.model import Model, Input, set_device
from models import yolov3_darknet53, YoloLoss from hapi.vision.models import yolov3_darknet53, YoloLoss
from hapi.vision.transforms import *
from coco import COCODataset from coco import COCODataset
from transforms import *
from visualizer import draw_bbox from visualizer import draw_bbox
import logging import logging
...@@ -65,7 +65,8 @@ def main(): ...@@ -65,7 +65,8 @@ def main():
device = set_device(FLAGS.device) device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 3], 'int32', name='img_info'), inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')] Input([None, 3, None, None], 'float32', name='image')]
cat2name = load_labels(FLAGS.label_list, with_background=False) cat2name = load_labels(FLAGS.label_list, with_background=False)
...@@ -87,9 +88,10 @@ def main(): ...@@ -87,9 +88,10 @@ def main():
img -= np.array(IMAGE_MEAN) img -= np.array(IMAGE_MEAN)
img /= np.array(IMAGE_STD) img /= np.array(IMAGE_STD)
img = img.transpose((2, 0, 1))[np.newaxis, :] img = img.transpose((2, 0, 1))[np.newaxis, :]
img_info = np.array([0, h, w]).astype('int32')[np.newaxis, :] img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_info, img]) _, bboxes = model.test([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold) vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image) save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
......
...@@ -25,13 +25,13 @@ from paddle import fluid ...@@ -25,13 +25,13 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.io import DataLoader from paddle.io import DataLoader
from model import Model, Input, set_device from hapi.model import Model, Input, set_device
from distributed import DistributedBatchSampler from hapi.distributed import DistributedBatchSampler
from models import yolov3_darknet53, YoloLoss from hapi.datasets import COCODataset
from hapi.vision.transforms import *
from hapi.vision.models import yolov3_darknet53, YoloLoss
from coco_metric import COCOMetric from coco_metric import COCOMetric
from vision.datasets import COCODataset
from vision.transforms import *
NUM_MAX_BOXES = 50 NUM_MAX_BOXES = 50
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import six import six
import copy import copy
from hapi.progressbar import ProgressBar from progressbar import ProgressBar
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
......
...@@ -75,7 +75,6 @@ class Flowers(Dataset): ...@@ -75,7 +75,6 @@ class Flowers(Dataset):
setid_file=None, setid_file=None,
mode='train', mode='train',
transform=None, transform=None,
target_transform=None,
download=True): download=True):
assert mode.lower() in ['train', 'valid', 'test'], \ assert mode.lower() in ['train', 'valid', 'test'], \
"mode should be 'train', 'valid' or 'test', but got {}".format(mode) "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
...@@ -100,7 +99,6 @@ class Flowers(Dataset): ...@@ -100,7 +99,6 @@ class Flowers(Dataset):
setid_file, SETID_URL, SETID_MD5, 'flowers', download) setid_file, SETID_URL, SETID_MD5, 'flowers', download)
self.transform = transform self.transform = transform
self.target_transform = target_transform
# read dataset into memory # read dataset into memory
self._load_anno() self._load_anno()
...@@ -123,9 +121,7 @@ class Flowers(Dataset): ...@@ -123,9 +121,7 @@ class Flowers(Dataset):
image = np.array(Image.open(io.BytesIO(image))) image = np.array(Image.open(io.BytesIO(image)))
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image, label = self.transform(image, label)
if self.target_transform is not None:
label = self.target_transform(label)
return image, label return image, label
......
...@@ -78,8 +78,6 @@ class DatasetFolder(Dataset): ...@@ -78,8 +78,6 @@ class DatasetFolder(Dataset):
both extensions and is_valid_file should not be passed. both extensions and is_valid_file should not be passed.
transform (callable|optional): A function/transform that takes in transform (callable|optional): A function/transform that takes in
a sample and returns a transformed version. a sample and returns a transformed version.
target_transform (callable|optional): A function/transform that takes
in the target and transforms it.
is_valid_file (callable|optional): A function that takes path of a file is_valid_file (callable|optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files) and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed. both extensions and is_valid_file should not be passed.
...@@ -96,11 +94,9 @@ class DatasetFolder(Dataset): ...@@ -96,11 +94,9 @@ class DatasetFolder(Dataset):
loader=None, loader=None,
extensions=None, extensions=None,
transform=None, transform=None,
target_transform=None,
is_valid_file=None): is_valid_file=None):
self.root = root self.root = root
self.transform = transform self.transform = transform
self.target_transform = target_transform
if extensions is None: if extensions is None:
extensions = IMG_EXTENSIONS extensions = IMG_EXTENSIONS
classes, class_to_idx = self._find_classes(self.root) classes, class_to_idx = self._find_classes(self.root)
...@@ -154,9 +150,7 @@ class DatasetFolder(Dataset): ...@@ -154,9 +150,7 @@ class DatasetFolder(Dataset):
path, target = self.samples[index] path, target = self.samples[index]
sample = self.loader(path) sample = self.loader(path)
if self.transform is not None: if self.transform is not None:
sample = self.transform(sample) sample, target = self.transform(sample, target)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target return sample, target
......
...@@ -72,7 +72,6 @@ class MNIST(Dataset): ...@@ -72,7 +72,6 @@ class MNIST(Dataset):
label_path=None, label_path=None,
mode='train', mode='train',
transform=None, transform=None,
target_transform=None,
download=True): download=True):
assert mode.lower() in ['train', 'test'], \ assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode) "mode should be 'train' or 'test', but got {}".format(mode)
...@@ -95,7 +94,6 @@ class MNIST(Dataset): ...@@ -95,7 +94,6 @@ class MNIST(Dataset):
label_path, label_url, label_md5, 'mnist', download) label_path, label_url, label_md5, 'mnist', download)
self.transform = transform self.transform = transform
self.target_transform = target_transform
# read dataset into memory # read dataset into memory
self._parse_dataset() self._parse_dataset()
...@@ -151,9 +149,7 @@ class MNIST(Dataset): ...@@ -151,9 +149,7 @@ class MNIST(Dataset):
def __getitem__(self, idx): def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx] image, label = self.images[idx], self.labels[idx]
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image, label = self.transform(image, label)
if self.target_transform is not None:
label = self.target_transform(label)
return image, label return image, label
def __len__(self): def __len__(self):
......
...@@ -23,7 +23,7 @@ import numpy as np ...@@ -23,7 +23,7 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.layers import collective from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
from paddle.fluid.io import BatchSampler from paddle.io import BatchSampler
_parallel_context_initialized = False _parallel_context_initialized = False
...@@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler): ...@@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler):
Dataset is assumed to be of constant size. Dataset is assumed to be of constant size.
Args: Args:
data_source: this could be a `fluid.io.Dataset` implement data_source: this could be a `paddle.io.Dataset` implement
or other python object which implemented or other python object which implemented
`__len__` for BatchSampler to get sample `__len__` for BatchSampler to get sample
number of data source. number of data source.
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path as osp
import shutil
import requests
import tqdm
import hashlib
import time
from paddle.fluid.dygraph.parallel import ParallelEnv
import logging
logger = logging.getLogger(__name__)
__all__ = ['get_weights_path']
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3
def get_weights_path(url, md5sum=None):
"""Get weights path from WEIGHT_HOME, if not exists,
download it from url.
"""
path, _ = get_path(url, WEIGHTS_HOME, md5sum)
return path
def map_path(url, root_dir):
# parse path after download under root_dir
fname = osp.split(url)[-1]
fpath = fname
return osp.join(root_dir, fpath)
def get_path(url, root_dir, md5sum=None, check_exist=True):
""" Download from given url to root_dir.
if file or directory specified by url is exists under
root_dir, return the path directly, otherwise download
from url and decompress it, return the path.
url (str): download url
root_dir (str): root dir for downloading, it should be
WEIGHTS_HOME or DATASET_HOME
md5sum (str): md5 sum of download package
"""
# parse path after download to decompress under root_dir
fullpath = map_path(url, root_dir)
exist_flag = False
if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
exist_flag = True
if ParallelEnv().local_rank == 0:
logger.info("Found {}".format(fullpath))
else:
if ParallelEnv().local_rank == 0:
fullpath = _download(url, root_dir, md5sum)
else:
while not os.path.exists(fullpath):
time.sleep(1)
return fullpath, exist_flag
def _download(url, path, md5sum=None):
"""
Download from url, save to path.
url (str): download url
path (str): download to given path
"""
if not osp.exists(path):
os.makedirs(path)
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
retry_cnt = 0
while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
retry_cnt += 1
else:
raise RuntimeError("Download from {} failed. "
"Retry limit reached".format(url))
if ParallelEnv().local_rank == 0:
logger.info("Downloading {} from {}".format(fname, url))
req = requests.get(url, stream=True)
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
total_size = req.headers.get('content-length')
with open(tmp_fullname, 'wb') as f:
if total_size:
for chunk in tqdm.tqdm(
req.iter_content(chunk_size=1024),
total=(int(total_size) + 1023) // 1024,
unit='KB'):
f.write(chunk)
else:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
return fullname
def _md5check(fullname, md5sum=None):
if md5sum is None:
return True
if ParallelEnv().local_rank == 0:
logger.info("File {} md5 checking...".format(fullname))
md5 = hashlib.md5()
with open(fullname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
if ParallelEnv().local_rank == 0:
logger.info("File {} md5 check failed, {}(calc) != "
"{}(base)".format(fullname, calc_md5sum, md5sum))
return False
return True
...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv ...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
from paddle.fluid.incubate.fleet.base import role_maker from paddle.fluid.incubate.fleet.base import role_maker
from paddle.fluid.io import DataLoader, Dataset from paddle.io import DataLoader, Dataset
from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from hapi.metrics import Metric from hapi.metrics import Metric
...@@ -45,6 +45,14 @@ __all__ = [ ...@@ -45,6 +45,14 @@ __all__ = [
def set_device(device): def set_device(device):
"""
Args:
device (str): specify device type, 'cpu' or 'gpu'.
Returns:
fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
"""
assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \ assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
"Expected device in ['cpu', 'gpu'], but got {}".format(device) "Expected device in ['cpu', 'gpu'], but got {}".format(device)
...@@ -117,9 +125,9 @@ class Loss(object): ...@@ -117,9 +125,9 @@ class Loss(object):
def forward(self, outputs, labels): def forward(self, outputs, labels):
raise NotImplementedError() raise NotImplementedError()
def __call__(self, outputs, labels): def __call__(self, outputs, labels=None):
labels = to_list(labels) labels = to_list(labels)
if in_dygraph_mode(): if in_dygraph_mode() and labels:
labels = [to_variable(l) for l in labels] labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels)) losses = to_list(self.forward(to_list(outputs), labels))
if self.average: if self.average:
...@@ -366,10 +374,27 @@ class StaticGraphAdapter(object): ...@@ -366,10 +374,27 @@ class StaticGraphAdapter(object):
metric_list, metric_splits = flatten_list(endpoints['metric']) metric_list, metric_splits = flatten_list(endpoints['metric'])
fetch_list = endpoints['loss'] + metric_list fetch_list = endpoints['loss'] + metric_list
num_loss = len(endpoints['loss']) num_loss = len(endpoints['loss'])
# if fetch Variable is same as input Variable, do not fetch
# from program, get it from input directly
pruned_fetch_list = []
pruned_fetch_idx_name_map = [""] * len(fetch_list)
for i, fetch_var in enumerate(fetch_list):
if fetch_var.name in feed.keys():
pruned_fetch_idx_name_map[i] = fetch_var.name
else:
pruned_fetch_list.append(fetch_var)
rets = self._executor.run(compiled_prog, rets = self._executor.run(compiled_prog,
feed=feed, feed=feed,
fetch_list=fetch_list, fetch_list=pruned_fetch_list,
return_numpy=False) return_numpy=False)
# restore pruned fetch_list Variable from feeds
for i, name in enumerate(pruned_fetch_idx_name_map):
if len(name) > 0:
rets.insert(i, feed[name])
# LoDTensor cannot be fetch as numpy directly # LoDTensor cannot be fetch as numpy directly
rets = [np.array(v) for v in rets] rets = [np.array(v) for v in rets]
if self.mode == 'test': if self.mode == 'test':
...@@ -867,8 +892,6 @@ class Model(fluid.dygraph.Layer): ...@@ -867,8 +892,6 @@ class Model(fluid.dygraph.Layer):
if not isinstance(inputs, (list, dict, Input)): if not isinstance(inputs, (list, dict, Input)):
raise TypeError( raise TypeError(
"'inputs' must be list or dict in static graph mode") "'inputs' must be list or dict in static graph mode")
if loss_function and not isinstance(labels, (list, Input)):
raise TypeError("'labels' must be list in static graph mode")
metrics = metrics or [] metrics = metrics or []
for metric in to_list(metrics): for metric in to_list(metrics):
...@@ -904,11 +927,11 @@ class Model(fluid.dygraph.Layer): ...@@ -904,11 +927,11 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
train_data (Dataset|DataLoader): An iterable data loader is used for train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle.fluid.io.Dataset or train. An instance of paddle paddle.io.Dataset or
paddle.fluid.io.Dataloader is recomended. paddle.io.Dataloader is recomended.
eval_data (Dataset|DataLoader): An iterable data loader is used for eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation. evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When train_data and eval_data are both the instance of Dataloader, this
...@@ -1032,8 +1055,8 @@ class Model(fluid.dygraph.Layer): ...@@ -1032,8 +1055,8 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.fluid.io.Dataset or evaluation. An instance of paddle.io.Dataset or
paddle.fluid.io.Dataloader is recomended. paddle.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored. parameter will be ignored.
...@@ -1098,12 +1121,16 @@ class Model(fluid.dygraph.Layer): ...@@ -1098,12 +1121,16 @@ class Model(fluid.dygraph.Layer):
return eval_result return eval_result
def predict(self, test_data, batch_size=1, num_workers=0): def predict(self,
test_data,
batch_size=1,
num_workers=0,
stack_outputs=True):
""" """
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
test_data (Dataset|DataLoader): An iterable data loader is used for test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When train_data and eval_data are both the instance of Dataloader, this
...@@ -1111,6 +1138,12 @@ class Model(fluid.dygraph.Layer): ...@@ -1111,6 +1138,12 @@ class Model(fluid.dygraph.Layer):
num_workers (int): the number of subprocess to load data, 0 for no subprocess num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored. both the instance of Dataloader, this parameter will be ignored.
stack_output (bool): whether stack output field like a batch, as for an output
filed of a sample is in shape [X, Y], test_data contains N samples, predict
output field will be in shape [N, X, Y] if stack_output is True, and will
be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
is False. stack_outputs as False is used for LoDTensor output situation,
it is recommended set as True if outputs contains no LoDTensor. Default False
""" """
if fluid.in_dygraph_mode(): if fluid.in_dygraph_mode():
...@@ -1137,19 +1170,16 @@ class Model(fluid.dygraph.Layer): ...@@ -1137,19 +1170,16 @@ class Model(fluid.dygraph.Layer):
if not isinstance(test_loader, Iterable): if not isinstance(test_loader, Iterable):
loader = test_loader() loader = test_loader()
outputs = None outputs = []
for data in tqdm.tqdm(loader): for data in tqdm.tqdm(loader):
if not fluid.in_dygraph_mode(): data = flatten(data)
data = data[0] outputs.append(self.test(data[:len(self._inputs)]))
outs = self.test(*data)
if outputs is None: # NOTE: for lod tensor output, we should not stack outputs
outputs = outs # for stacking may loss its detail info
else: outputs = list(zip(*outputs))
outputs = [ if stack_outputs:
np.vstack([x, outs[i]]) for i, x in enumerate(outputs) outputs = [np.stack(outs, axis=0) for outs in outputs]
]
self._test_dataloader = None self._test_dataloader = None
if test_loader is not None and self._adapter._nranks > 1 \ if test_loader is not None and self._adapter._nranks > 1 \
...@@ -1161,8 +1191,8 @@ class Model(fluid.dygraph.Layer): ...@@ -1161,8 +1191,8 @@ class Model(fluid.dygraph.Layer):
""" """
Args: Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.fluid.io.Dataset or eval. An instance of paddle.io.Dataset or
paddle.fluid.io.Dataloader is recomended. paddle.io.Dataloader is recomended.
""" """
assert isinstance( assert isinstance(
eval_data, eval_data,
......
...@@ -25,7 +25,7 @@ from functools import partial ...@@ -25,7 +25,7 @@ from functools import partial
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset from paddle.io import BatchSampler, DataLoader, Dataset
from hapi.distributed import DistributedBatchSampler from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data from hapi.text.bert.batching import prepare_batch_data
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from . import resnet
from . import vgg
from . import mobilenetv1
from . import mobilenetv2
from . import darknet
from . import yolov3
from . import tsm
from . import bmn
from .resnet import *
from .mobilenetv1 import *
from .mobilenetv2 import *
from .vgg import *
from .darknet import *
from .yolov3 import *
from .tsm import *
from .bmn import *
__all__ = resnet.__all__ \
+ vgg.__all__ \
+ mobilenetv1.__all__ \
+ mobilenetv2.__all__ \
+ darknet.__all__ \
+ yolov3.__all__ \
+ tsm.__all__ \
+ bmn.__all__
...@@ -17,12 +17,68 @@ from paddle.fluid import ParamAttr ...@@ -17,12 +17,68 @@ from paddle.fluid import ParamAttr
import numpy as np import numpy as np
import math import math
from bmn_utils import get_interp1d_mask from hapi.model import Model, Loss
from model import Model, Loss
__all__ = ["BMN", "BmnLoss"]
DATATYPE = 'float32' DATATYPE = 'float32'
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
# Net # Net
class Conv1D(fluid.dygraph.Layer): class Conv1D(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53']
# {num_layers: (url, md5)}
pretrain_infos = {
53: ('https://paddlemodels.bj.bcebos.com/hapi/darknet53.pdparams',
'2506357a5c31e865785112fc614a487d')
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=1,
groups=1,
padding=0,
act="leaky"):
super(ConvBNLayer, self).__init__()
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=False,
act=None)
self.batch_norm = BatchNorm(
num_channels=ch_out,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02),
regularizer=L2Decay(0.)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.)))
self.act = act
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
if self.act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class DownSample(fluid.dygraph.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=2,
padding=1):
super(DownSample, self).__init__()
self.conv_bn_layer = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding)
self.ch_out = ch_out
def forward(self, inputs):
out = self.conv_bn_layer(inputs)
return out
class BasicBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out):
super(BasicBlock, self).__init__()
self.conv1 = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
padding=0)
self.conv2 = ConvBNLayer(
ch_in=ch_out,
ch_out=ch_out*2,
filter_size=3,
stride=1,
padding=1)
def forward(self, inputs):
conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1)
out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None)
return out
class LayerWarp(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out, count):
super(LayerWarp,self).__init__()
self.basicblock0 = BasicBlock(ch_in, ch_out)
self.res_out_list = []
for i in range(1,count):
res_out = self.add_sublayer("basic_block_%d" % (i),
BasicBlock(
ch_out*2,
ch_out))
self.res_out_list.append(res_out)
self.ch_out = ch_out
def forward(self,inputs):
y = self.basicblock0(inputs)
for basic_block_i in self.res_out_list:
y = basic_block_i(y)
return y
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
class DarkNet(Model):
"""DarkNet model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53.
ch_in (int): channel number of input data, default 3.
"""
def __init__(self, num_layers=53, ch_in=3):
super(DarkNet, self).__init__()
assert num_layers in DarkNet_cfg.keys(), \
"only support num_layers in {} currently" \
.format(DarkNet_cfg.keys())
self.stages = DarkNet_cfg[num_layers]
self.stages = self.stages[0:5]
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=32,
filter_size=3,
stride=1,
padding=1)
self.downsample0 = DownSample(
ch_in=32,
ch_out=32 * 2)
self.darknet53_conv_block_list = []
self.downsample_list = []
ch_in = [64,128,256,512,1024]
for i, stage in enumerate(self.stages):
conv_block = self.add_sublayer(
"stage_%d" % (i),
LayerWarp(
int(ch_in[i]),
32*(2**i),
stage))
self.darknet53_conv_block_list.append(conv_block)
for i in range(len(self.stages) - 1):
downsample = self.add_sublayer(
"stage_%d_downsample" % i,
DownSample(
ch_in = 32*(2**(i+1)),
ch_out = 32*(2**(i+2))))
self.downsample_list.append(downsample)
def forward(self,inputs):
out = self.conv0(inputs)
out = self.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(self.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.stages) - 1:
out = self.downsample_list[i](out)
return blocks[-1:-4:-1]
def _darknet(num_layers=53, input_channels=3, pretrained=True):
model = DarkNet(num_layers, input_channels)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
"DarkNet{} do not have pretrained weights now, " \
"pretrained should be set as False".format(num_layers)
weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def darknet53(input_channels=3, pretrained=True):
"""DarkNet 53-layer model
Args:
input_channels (bool): channel number of input data, default 3.
pretrained (bool): If True, returns a model pre-trained on ImageNet,
default True.
"""
return _darknet(53, input_channels, pretrained)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['MobileNetV1', 'mobilenet_v1']
model_urls = {
'mobilenetv1_1.0':
('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams',
'bf0d25cb0bed1114d9dac9384ce2b4a6')
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class DepthwiseSeparable(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False)
self._pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, inputs):
y = self._depthwise_conv(inputs)
y = self._pointwise_conv(y)
return y
class MobileNetV1(Model):
"""MobileNetV1 model from
`"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_.
Args:
scale (float): scale of channels in each layer. Default: 1.0.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self,
scale=1.0,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(MobileNetV1, self).__init__()
self.scale = scale
self.dwsl = []
self.num_classes = num_classes
self.with_pool = with_pool
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1)
dws21 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale),
name="conv2_1")
self.dwsl.append(dws21)
dws22 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=2,
scale=scale),
name="conv2_2")
self.dwsl.append(dws22)
dws31 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale),
name="conv3_1")
self.dwsl.append(dws31)
dws32 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale),
name="conv3_2")
self.dwsl.append(dws32)
dws41 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale),
name="conv4_1")
self.dwsl.append(dws41)
dws42 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale),
name="conv4_2")
self.dwsl.append(dws42)
for i in range(5):
tmp = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale),
name="conv5_" + str(i + 1))
self.dwsl.append(tmp)
dws56 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale),
name="conv5_6")
self.dwsl.append(dws56)
dws6 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale),
name="conv6")
self.dwsl.append(dws6)
if with_pool:
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
if num_classes > -1:
self.out = Linear(
int(1024 * scale),
num_classes,
act=classifier_activation,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
def forward(self, inputs):
y = self.conv1(inputs)
for dws in self.dwsl:
y = dws(y)
if self.with_pool:
y = self.pool2d_avg(y)
if self.num_classes > 0:
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y)
return y
def _mobilenet(arch, pretrained=False, **kwargs):
model = MobileNetV1(num_classes=1000, with_pool=True, **kwargs)
if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch)
weight_path = get_weights_path(model_urls[arch][0],
model_urls[arch][1])
assert weight_path.endswith(
'.pdparams'), "suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def mobilenet_v1(pretrained=False, scale=1.0):
"""MobileNetV1
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale: (float): scale of channels in each layer. Default: 1.0.
"""
model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale)
return model
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenetv2_1.0':
('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams',
'8ff74f291f72533f2a7956a4efff9d88')
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
use_cudnn=True):
super(ConvBNLayer, self).__init__()
tmp_param = ParamAttr(name=self.full_name() + "_weights")
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=tmp_param,
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs, if_act=True):
y = self._conv(inputs)
y = self._batch_norm(y)
if if_act:
y = fluid.layers.relu6(y)
return y
class InvertedResidualUnit(fluid.dygraph.Layer):
def __init__(
self,
num_channels,
num_in_filter,
num_filters,
stride,
filter_size,
padding,
expansion_factor, ):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1)
self._bottleneck_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
use_cudnn=False)
self._linear_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1)
def forward(self, inputs, ifshortcut):
y = self._expand_conv(inputs, if_act=True)
y = self._bottleneck_conv(y, if_act=True)
y = self._linear_conv(y, if_act=False)
if ifshortcut:
y = fluid.layers.elementwise_add(inputs, y)
return y
class InvresiBlocks(fluid.dygraph.Layer):
def __init__(self, in_c, t, c, n, s):
super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(
num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t)
self._inv_blocks = []
for i in range(1, n):
tmp = self.add_sublayer(
sublayer=InvertedResidualUnit(
num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t),
name=self.full_name() + "_" + str(i + 1))
self._inv_blocks.append(tmp)
def forward(self, inputs):
y = self._first_block(inputs, ifshortcut=False)
for inv_block in self._inv_blocks:
y = inv_block(y, ifshortcut=True)
return y
class MobileNetV2(Model):
"""MobileNetV2 model from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
scale (float): scale of channels in each layer. Default: 1.0.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self,
scale=1.0,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(MobileNetV2, self).__init__()
self.scale = scale
self.num_classes = num_classes
self.with_pool = with_pool
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
self._conv1 = ConvBNLayer(
num_channels=3,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1)
self._invl = []
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
tmp = self.add_sublayer(
sublayer=InvresiBlocks(
in_c=in_c, t=t, c=int(c * scale), n=n, s=s),
name='conv' + str(i))
self._invl.append(tmp)
in_c = int(c * scale)
self._out_c = int(1280 * scale) if scale > 1.0 else 1280
self._conv9 = ConvBNLayer(
num_channels=in_c,
num_filters=self._out_c,
filter_size=1,
stride=1,
padding=0)
if with_pool:
self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
if num_classes > 0:
tmp_param = ParamAttr(name=self.full_name() + "fc10_weights")
self._fc = Linear(
self._out_c,
num_classes,
act=classifier_activation,
param_attr=tmp_param,
bias_attr=ParamAttr(name="fc10_offset"))
def forward(self, inputs):
y = self._conv1(inputs, if_act=True)
for inv in self._invl:
y = inv(y)
y = self._conv9(y, if_act=True)
if self.with_pool:
y = self._pool2d_avg(y)
if self.num_classes > 0:
y = fluid.layers.reshape(y, shape=[-1, self._out_c])
y = self._fc(y)
return y
def _mobilenet(arch, pretrained=False, **kwargs):
model = MobileNetV2(num_classes=1000, with_pool=True, **kwargs)
if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch)
weight_path = get_weights_path(model_urls[arch][0],
model_urls[arch][1])
assert weight_path.endswith(
'.pdparams'), "suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def mobilenet_v2(pretrained=False, scale=1.0):
"""MobileNetV2
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale: (float): scale of channels in each layer. Default: 1.0.
"""
model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale)
return model
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = [
'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
]
model_urls = {
'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams',
'0884c9087266496c41c60d14a96f8530')
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
x = self._conv(inputs)
x = self._batch_norm(x)
return x
class BasicBlock(fluid.dygraph.Layer):
expansion = 1
def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BasicBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv1
return fluid.layers.relu(y)
class BottleneckBlock(fluid.dygraph.Layer):
expansion = 4
def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * self.expansion,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * self.expansion,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * self.expansion
def forward(self, inputs):
x = self.conv0(inputs)
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
x = fluid.layers.elementwise_add(x=short, y=conv2)
return fluid.layers.relu(x)
class ResNet(Model):
"""ResNet model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
Block (BasicBlock|BottleneckBlock): block module of model.
depth (int): layers of resnet, default: 50.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self,
Block,
depth=50,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(ResNet, self).__init__()
self.num_classes = num_classes
self.with_pool = with_pool
layer_config = {
18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
}
assert depth in layer_config.keys(), \
"supported depth are {} but input layer is {}".format(
layer_config.keys(), depth)
layers = layer_config[depth]
in_channels = 64
out_channels = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.layers = []
for idx, num_blocks in enumerate(layers):
blocks = []
shortcut = False
for b in range(num_blocks):
if b == 1:
in_channels = out_channels[idx] * Block.expansion
block = Block(
num_channels=in_channels,
num_filters=out_channels[idx],
stride=2 if b == 0 and idx != 0 else 1,
shortcut=shortcut)
blocks.append(block)
shortcut = True
layer = self.add_sublayer("layer_{}".format(idx),
Sequential(*blocks))
self.layers.append(layer)
if with_pool:
self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
if num_classes > 0:
stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0)
self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1
self.fc = Linear(
self.fc_input_dim,
num_classes,
act=classifier_activation,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
x = self.conv(inputs)
x = self.pool(x)
for layer in self.layers:
x = layer(x)
if self.with_pool:
x = self.global_pool(x)
if self.num_classes > -1:
x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim])
x = self.fc(x)
return x
def _resnet(arch, Block, depth, pretrained):
model = ResNet(Block, depth, num_classes=1000, with_pool=True)
if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch)
weight_path = get_weights_path(model_urls[arch][0],
model_urls[arch][1])
assert weight_path.endswith(
'.pdparams'), "suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def resnet18(pretrained=False):
"""ResNet 18-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _resnet('resnet18', BasicBlock, 18, pretrained)
def resnet34(pretrained=False):
"""ResNet 34-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _resnet('resnet34', BasicBlock, 34, pretrained)
def resnet50(pretrained=False):
"""ResNet 50-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _resnet('resnet50', BottleneckBlock, 50, pretrained)
def resnet101(pretrained=False):
"""ResNet 101-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _resnet('resnet101', BottleneckBlock, 101, pretrained)
def resnet152(pretrained=False):
"""ResNet 152-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _resnet('resnet152', BottleneckBlock, 152, pretrained)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import math
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ["TSM_ResNet", "tsm_resnet50"]
# {num_layers: (url, md5)}
pretrain_infos = {
50: ('https://paddlemodels.bj.bcebos.com/hapi/tsm_resnet50.pdparams',
'5755dc538e422589f417f7b38d7cc3c7')
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=None,
act=None,
param_attr=fluid.param_attr.ParamAttr(),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=fluid.param_attr.ParamAttr(),
bias_attr=fluid.param_attr.ParamAttr())
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
seg_num=8):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self.seg_num = seg_num
self._num_channels_out = int(num_filters * 4)
def forward(self, inputs):
shifts = fluid.layers.temporal_shift(inputs, self.seg_num, 1.0 / 8)
y = self.conv0(shifts)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu")
return y
class TSM_ResNet(Model):
"""
TSM network with ResNet as backbone
Args:
num_layers (int): ResNet layer number, only support 50 currently.
Default 50.
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
"""
def __init__(self, num_layers=50, seg_num=8, num_classes=400):
super(TSM_ResNet, self).__init__()
self.layers = num_layers
self.seg_num = seg_num
self.class_dim = num_classes
if self.layers == 50:
depth = [3, 4, 6, 3]
else:
raise NotImplementedError
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
seg_num=self.seg_num))
num_channels = int(bottleneck_block._num_channels_out)
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(
2048,
self.class_dim,
act="softmax",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=fluid.param_attr.ParamAttr(
learning_rate=2.0, regularizer=fluid.regularizer.L2Decay(0.)))
def forward(self, inputs):
y = fluid.layers.reshape(
inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
y = self.conv(y)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.dropout(y, dropout_prob=0.5)
y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]])
y = fluid.layers.reduce_mean(y, dim=1)
y = fluid.layers.reshape(y, shape=[-1, 2048])
y = self.out(y)
return y
def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
model = TSM_ResNet(num_layers, seg_num, num_classes)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
"TSM-ResNet{} do not have pretrained weights now, " \
"pretrained should be set as False".format(num_layers)
weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True):
"""TSM model with 50-layer ResNet as backbone
Args:
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _tsm_resnet(50, seg_num, num_classes, pretrained)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = [
'VGG',
'vgg11',
'vgg13',
'vgg16',
'vgg19',
]
model_urls = {
'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams',
'c788f453a3b999063e8da043456281ee')
}
class Classifier(fluid.dygraph.Layer):
def __init__(self, num_classes, classifier_activation='softmax'):
super(Classifier, self).__init__()
self.linear1 = Linear(512 * 7 * 7, 4096)
self.linear2 = Linear(4096, 4096)
self.linear3 = Linear(4096, num_classes, act=classifier_activation)
def forward(self, x):
x = self.linear1(x)
x = fluid.layers.relu(x)
x = fluid.layers.dropout(x, 0.5)
x = self.linear2(x)
x = fluid.layers.relu(x)
x = fluid.layers.dropout(x, 0.5)
out = self.linear3(x)
return out
class VGG(Model):
"""VGG model from
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
Args:
features (fluid.dygraph.Layer): vgg features create by function make_layers.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self,
features,
num_classes=1000,
classifier_activation='softmax'):
super(VGG, self).__init__()
self.features = features
self.num_classes = num_classes
if num_classes > 0:
classifier = Classifier(num_classes, classifier_activation)
self.classifier = self.add_sublayer("classifier",
Sequential(classifier))
def forward(self, x):
x = self.features(x)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.classifier(x)
return x
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [Pool2D(pool_size=2, pool_stride=2)]
else:
if batch_norm:
conv2d = Conv2D(in_channels, v, filter_size=3, padding=1)
layers += [conv2d, BatchNorm(v, act='relu')]
else:
conv2d = Conv2D(
in_channels, v, filter_size=3, padding=1, act='relu')
layers += [conv2d]
in_channels = v
return Sequential(*layers)
cfgs = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B':
[64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [
64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
512, 512, 512, 'M'
],
'E': [
64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512,
512, 'M', 512, 512, 512, 512, 'M'
],
}
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
model = VGG(make_layers(
cfgs[cfg], batch_norm=batch_norm),
num_classes=1000,
**kwargs)
if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch)
weight_path = get_weights_path(model_urls[arch][0],
model_urls[arch][1])
assert weight_path.endswith(
'.pdparams'), "suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def vgg11(pretrained=False, batch_norm=False):
"""VGG 11-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
"""
model_name = 'vgg11'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'A', batch_norm, pretrained)
def vgg13(pretrained=False, batch_norm=False):
"""VGG 13-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
"""
model_name = 'vgg13'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'B', batch_norm, pretrained)
def vgg16(pretrained=False, batch_norm=False):
"""VGG 16-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
"""
model_name = 'vgg16'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'D', batch_norm, pretrained)
def vgg19(pretrained=False, batch_norm=False):
"""VGG 19-layer model
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
"""
model_name = 'vgg19'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'E', batch_norm, pretrained)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from hapi.model import Model, Loss
from hapi.download import get_weights_path
from .darknet import darknet53, ConvBNLayer
__all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53']
# {num_layers: (url, md5)}
pretrain_infos = {
53: ('https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams',
'aed7dd45124ff2e844ae3bd5ba6c91d2')
}
class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__()
assert channel % 2 == 0, \
"channel {} cannot be divided by 2".format(channel)
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv1 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
filter_size=3,
stride=1,
padding=1)
self.conv2 = ConvBNLayer(
ch_in=channel*2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv3 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
filter_size=3,
stride=1,
padding=1)
self.route = ConvBNLayer(
ch_in=channel*2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.tip = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
filter_size=3,
stride=1,
padding=1)
def forward(self, inputs):
out = self.conv0(inputs)
out = self.conv1(out)
out = self.conv2(out)
out = self.conv3(out)
route = self.route(out)
tip = self.tip(route)
return route, tip
class YOLOv3(Model):
"""YOLOv3 model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
"""
def __init__(self, num_classes=80, model_mode='train'):
super(YOLOv3, self).__init__()
self.num_classes = num_classes
assert str.lower(model_mode) in ['train', 'eval', 'test'], \
"model_mode should be 'train' 'eval' or 'test', but got " \
"{}".format(model_mode)
self.model_mode = str.lower(model_mode)
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self.valid_thresh = 0.005
self.nms_thresh = 0.45
self.nms_topk = 400
self.nms_posk = 100
self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=(model_mode=='train'))
self.block_outputs = []
self.yolo_blocks = []
self.route_blocks = []
for idx, num_chan in enumerate([1024, 768, 384]):
yolo_block = self.add_sublayer(
"yolo_detecton_block_{}".format(idx),
YoloDetectionBlock(num_chan, 512 // (2**idx)))
self.yolo_blocks.append(yolo_block)
num_filters = len(self.anchor_masks[idx]) * (self.num_classes + 5)
block_out = self.add_sublayer(
"block_out_{}".format(idx),
Conv2D(num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
self.block_outputs.append(block_out)
if idx < 2:
route = self.add_sublayer(
"route2_{}".format(idx),
ConvBNLayer(ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
self.route_blocks.append(route)
def forward(self, img_id, img_shape, inputs):
outputs = []
boxes = []
scores = []
downsample = 32
feats = self.backbone(inputs)
route = None
for idx, feat in enumerate(feats):
if idx > 0:
feat = fluid.layers.concat(input=[route, feat], axis=1)
route, tip = self.yolo_blocks[idx](feat)
block_out = self.block_outputs[idx](tip)
outputs.append(block_out)
if idx < 2:
route = self.route_blocks[idx](route)
route = fluid.layers.resize_nearest(route, scale=2)
if self.model_mode != 'train':
anchor_mask = self.anchor_masks[idx]
mask_anchors = []
for m in anchor_mask:
mask_anchors.append(self.anchors[2 * m])
mask_anchors.append(self.anchors[2 * m + 1])
b, s = fluid.layers.yolo_box(
x=block_out,
img_size=img_shape,
anchors=mask_anchors,
class_num=self.num_classes,
conf_thresh=self.valid_thresh,
downsample_ratio=downsample)
boxes.append(b)
scores.append(fluid.layers.transpose(s, perm=[0, 2, 1]))
downsample //= 2
if self.model_mode == 'train':
return outputs
preds = [img_id,
fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1),
scores=fluid.layers.concat(scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)]
if self.model_mode == 'test':
return preds
# model_mode == "eval"
return outputs + preds
class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50):
super(YoloLoss, self).__init__()
self.num_classes = num_classes
self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
def forward(self, outputs, labels):
downsample = 32
gt_box, gt_label, gt_score = labels
losses = []
for idx, out in enumerate(outputs):
if idx == 3: break # debug
anchor_mask = self.anchor_masks[idx]
loss = fluid.layers.yolov3_loss(
x=out,
gt_box=gt_box,
gt_label=gt_label,
gt_score=gt_score,
anchor_mask=anchor_mask,
downsample_ratio=downsample,
anchors=self.anchors,
class_num=self.num_classes,
ignore_thresh=self.ignore_thresh,
use_label_smooth=True)
loss = fluid.layers.reduce_mean(loss)
losses.append(loss)
downsample //= 2
return losses
def _yolov3_darknet(num_layers=53, num_classes=80,
model_mode='train', pretrained=True):
model = YOLOv3(num_classes, model_mode)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
"YOLOv3-DarkNet{} do not have pretrained weights now, " \
"pretrained should be set as False".format(num_layers)
weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
return model
def yolov3_darknet53(num_classes=80, model_mode='train', pretrained=True):
"""YOLOv3 model with 53-layer DarkNet as backbone
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _yolov3_darknet(53, num_classes, model_mode, pretrained)
...@@ -129,7 +129,7 @@ class Resize(object): ...@@ -129,7 +129,7 @@ class Resize(object):
self.size = size self.size = size
self.interpolation = interpolation self.interpolation = interpolation
def __call__(self, img): def __call__(self, img, lbl):
""" """
Args: Args:
img (PIL Image): Image to be scaled. img (PIL Image): Image to be scaled.
...@@ -137,7 +137,7 @@ class Resize(object): ...@@ -137,7 +137,7 @@ class Resize(object):
Returns: Returns:
PIL Image: Rescaled image. PIL Image: Rescaled image.
""" """
return F.resize(img, self.size, self.interpolation) return F.resize(img, self.size, self.interpolation), lbl
class RandomResizedCrop(object): class RandomResizedCrop(object):
...@@ -199,10 +199,10 @@ class RandomResizedCrop(object): ...@@ -199,10 +199,10 @@ class RandomResizedCrop(object):
y = (height - h) // 2 y = (height - h) // 2
return x, y, w, h return x, y, w, h
def __call__(self, img): def __call__(self, img, lbl):
x, y, w, h = self._get_params(img) x, y, w, h = self._get_params(img)
cropped_img = img[y:y + h, x:x + w] cropped_img = img[y:y + h, x:x + w]
return F.resize(cropped_img, self.output_size, self.interpolation) return F.resize(cropped_img, self.output_size, self.interpolation), lbl
class CenterCropResize(object): class CenterCropResize(object):
...@@ -230,10 +230,10 @@ class CenterCropResize(object): ...@@ -230,10 +230,10 @@ class CenterCropResize(object):
y = (w + 1 - c) // 2 y = (w + 1 - c) // 2
return c, x, y return c, x, y
def __call__(self, img): def __call__(self, img, lbl):
c, x, y = self._get_params(img) c, x, y = self._get_params(img)
cropped_img = img[x:x + c, y:y + c, :] cropped_img = img[x:x + c, y:y + c, :]
return F.resize(cropped_img, self.size, self.interpolation) return F.resize(cropped_img, self.size, self.interpolation), lbl
class CenterCrop(object): class CenterCrop(object):
...@@ -257,10 +257,10 @@ class CenterCrop(object): ...@@ -257,10 +257,10 @@ class CenterCrop(object):
y = int(round((h - th) / 2.0)) y = int(round((h - th) / 2.0))
return x, y return x, y
def __call__(self, img): def __call__(self, img, lbl):
x, y = self._get_params(img) x, y = self._get_params(img)
th, tw = self.output_size th, tw = self.output_size
return img[y:y + th, x:x + tw] return img[y:y + th, x:x + tw], lbl
class RandomHorizontalFlip(object): class RandomHorizontalFlip(object):
...@@ -273,10 +273,10 @@ class RandomHorizontalFlip(object): ...@@ -273,10 +273,10 @@ class RandomHorizontalFlip(object):
def __init__(self, prob=0.5): def __init__(self, prob=0.5):
self.prob = prob self.prob = prob
def __call__(self, img): def __call__(self, img, lbl):
if np.random.random() < self.prob: if np.random.random() < self.prob:
return F.flip(img, code=1) return F.flip(img, code=1), lbl
return img return img, lbl
class RandomVerticalFlip(object): class RandomVerticalFlip(object):
...@@ -289,10 +289,10 @@ class RandomVerticalFlip(object): ...@@ -289,10 +289,10 @@ class RandomVerticalFlip(object):
def __init__(self, prob=0.5): def __init__(self, prob=0.5):
self.prob = prob self.prob = prob
def __call__(self, img): def __call__(self, img, lbl):
if np.random.random() < self.prob: if np.random.random() < self.prob:
return F.flip(img, code=0) return F.flip(img, code=0), lbl
return img return img, lbl
class Normalize(object): class Normalize(object):
...@@ -317,8 +317,8 @@ class Normalize(object): ...@@ -317,8 +317,8 @@ class Normalize(object):
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def __call__(self, img): def __call__(self, img, lbl):
return (img - self.mean) / self.std return (img - self.mean) / self.std, lbl
class Permute(object): class Permute(object):
...@@ -337,10 +337,10 @@ class Permute(object): ...@@ -337,10 +337,10 @@ class Permute(object):
], "Only support 'CHW' mode, but received mode: {}".format(mode) ], "Only support 'CHW' mode, but received mode: {}".format(mode)
self.mode = mode self.mode = mode
def __call__(self, img): def __call__(self, img, lbl):
if self.mode == "CHW": if self.mode == "CHW":
return img.transpose((2, 0, 1))[::-1, ...] return img.transpose((2, 0, 1))[::-1, ...], lbl
return img return img, lbl
class GaussianNoise(object): class GaussianNoise(object):
...@@ -356,11 +356,11 @@ class GaussianNoise(object): ...@@ -356,11 +356,11 @@ class GaussianNoise(object):
self.mean = np.array(mean, dtype=np.float32) self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32) self.std = np.array(std, dtype=np.float32)
def __call__(self, img): def __call__(self, img, lbl):
dtype = img.dtype dtype = img.dtype
noise = np.random.normal(self.mean, self.std, img.shape) * 255 noise = np.random.normal(self.mean, self.std, img.shape) * 255
img = img + noise.astype(np.float32) img = img + noise.astype(np.float32)
return np.clip(img, 0, 255).astype(dtype) return np.clip(img, 0, 255).astype(dtype), lbl
class BrightnessTransform(object): class BrightnessTransform(object):
...@@ -376,15 +376,15 @@ class BrightnessTransform(object): ...@@ -376,15 +376,15 @@ class BrightnessTransform(object):
raise ValueError("brightness value should be non-negative") raise ValueError("brightness value should be non-negative")
self.value = value self.value = value
def __call__(self, img): def __call__(self, img, lbl):
if self.value == 0: if self.value == 0:
return img return img, lbl
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha img = img * alpha
return img.clip(0, 255).astype(dtype) return img.clip(0, 255).astype(dtype), lbl
class ContrastTransform(object): class ContrastTransform(object):
...@@ -400,16 +400,16 @@ class ContrastTransform(object): ...@@ -400,16 +400,16 @@ class ContrastTransform(object):
raise ValueError("contrast value should be non-negative") raise ValueError("contrast value should be non-negative")
self.value = value self.value = value
def __call__(self, img): def __call__(self, img, lbl):
if self.value == 0: if self.value == 0:
return img return img, lbl
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * ( img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha) 1 - alpha)
return img.clip(0, 255).astype(dtype) return img.clip(0, 255).astype(dtype), lbl
class SaturationTransform(object): class SaturationTransform(object):
...@@ -425,9 +425,9 @@ class SaturationTransform(object): ...@@ -425,9 +425,9 @@ class SaturationTransform(object):
raise ValueError("saturation value should be non-negative") raise ValueError("saturation value should be non-negative")
self.value = value self.value = value
def __call__(self, img): def __call__(self, img, lbl):
if self.value == 0: if self.value == 0:
return img return img, lbl
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
...@@ -435,7 +435,7 @@ class SaturationTransform(object): ...@@ -435,7 +435,7 @@ class SaturationTransform(object):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis] gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha) img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype) return img.clip(0, 255).astype(dtype), lbl
class HueTransform(object): class HueTransform(object):
...@@ -451,9 +451,9 @@ class HueTransform(object): ...@@ -451,9 +451,9 @@ class HueTransform(object):
raise ValueError("hue value should be in [0.0, 0.5]") raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value self.value = value
def __call__(self, img): def __call__(self, img, lbl):
if self.value == 0: if self.value == 0:
return img return img, lbl
dtype = img.dtype dtype = img.dtype
img = img.astype(np.uint8) img = img.astype(np.uint8)
...@@ -466,7 +466,7 @@ class HueTransform(object): ...@@ -466,7 +466,7 @@ class HueTransform(object):
with np.errstate(over="ignore"): with np.errstate(over="ignore"):
h += np.uint8(alpha * 255) h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v]) hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype) return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl
class ColorJitter(object): class ColorJitter(object):
...@@ -501,5 +501,5 @@ class ColorJitter(object): ...@@ -501,5 +501,5 @@ class ColorJitter(object):
random.shuffle(transforms) random.shuffle(transforms)
self.transforms = Compose(transforms) self.transforms = Compose(transforms)
def __call__(self, img): def __call__(self, img, lbl):
return self.transforms(img) return self.transforms(img), lbl
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册